diff options
Diffstat (limited to 'Source/bzip2/compress.c')
-rwxr-xr-x | Source/bzip2/compress.c | 1338 |
1 files changed, 669 insertions, 669 deletions
diff --git a/Source/bzip2/compress.c b/Source/bzip2/compress.c index 67485f4..6263c5f 100755 --- a/Source/bzip2/compress.c +++ b/Source/bzip2/compress.c @@ -1,669 +1,669 @@ -/*
- * This file is a part of the bzip2 compression module for NSIS.
- *
- * Copyright and license information can be found below.
- * Modifications Copyright (C) 1999-2007 Nullsoft and Contributors
- *
- * The original zlib source code is available at
- * http://www.bzip.org/
- *
- * This modification is not compatible with the original bzip2.
- *
- * This software is provided 'as-is', without any express or implied
- * warranty.
- */
-
-/*-------------------------------------------------------------*/
-/*--- Compression machinery (not incl block sorting) ---*/
-/*--- compress.c ---*/
-/*-------------------------------------------------------------*/
-
-/*--
- This file is a part of bzip2 and/or libbzip2, a program and
- library for lossless, block-sorting data compression.
-
- Copyright (C) 1996-2000 Julian R Seward. All rights reserved.
-
- Redistribution and use in source and binary forms, with or without
- modification, are permitted provided that the following conditions
- are met:
-
- 1. Redistributions of source code must retain the above copyright
- notice, this list of conditions and the following disclaimer.
-
- 2. The origin of this software must not be misrepresented; you must
- not claim that you wrote the original software. If you use this
- software in a product, an acknowledgment in the product
- documentation would be appreciated but is not required.
-
- 3. Altered source versions must be plainly marked as such, and must
- not be misrepresented as being the original software.
-
- 4. The name of the author may not be used to endorse or promote
- products derived from this software without specific prior written
- permission.
-
- THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
- OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
- WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
- DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
- GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
- WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
- NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
- Julian Seward, Cambridge, UK.
- jseward@acm.org
- bzip2/libbzip2 version 1.0 of 21 March 2000
-
- This program is based on (at least) the work of:
- Mike Burrows
- David Wheeler
- Peter Fenwick
- Alistair Moffat
- Radford Neal
- Ian H. Witten
- Robert Sedgewick
- Jon L. Bentley
-
- For more information on these sources, see the manual.
---*/
-
-/*--
- CHANGES
- ~~~~~~~
- 0.9.0 -- original version.
-
- 0.9.0a/b -- no changes in this file.
-
- 0.9.0c
- * changed setting of nGroups in sendMTFValues() so as to
- do a bit better on small files
---*/
-
-#include "bzlib.h"
-
-
-/*---------------------------------------------------*/
-/*--- Bit stream I/O ---*/
-/*---------------------------------------------------*/
-
-/*---------------------------------------------------*/
-void BZ2_bsInitWrite ( EState* s )
-{
- s->bsLive = 0;
- s->bsBuff = 0;
-}
-
-
-/*---------------------------------------------------*/
-static
-void bsFinishWrite ( EState* s )
-{
- while (s->bsLive > 0) {
- s->zbits[s->numZ] = (UChar)(s->bsBuff >> 24);
- s->numZ++;
- s->bsBuff <<= 8;
- s->bsLive -= 8;
- }
-}
-
-
-/*---------------------------------------------------*/
-#define bsNEEDW(nz) \
-{ \
- while (s->bsLive >= 8) { \
- s->zbits[s->numZ] \
- = (UChar)(s->bsBuff >> 24); \
- s->numZ++; \
- s->bsBuff <<= 8; \
- s->bsLive -= 8; \
- } \
-}
-
-
-/*---------------------------------------------------*/
-static void bsW ( EState* s, Int32 n, UInt32 v )
-{
- bsNEEDW ( n );
- s->bsBuff |= (v << (32 - s->bsLive - n));
- s->bsLive += n;
-}
-
-
-/*---------------------------------------------------*/
-/*static
-void bsPutUInt32 ( EState* s, UInt32 u )
-{
- bsW ( s, 8, (u >> 24) & 0xffL );
- bsW ( s, 8, (u >> 16) & 0xffL );
- bsW ( s, 8, (u >> 8) & 0xffL );
- bsW ( s, 8, u & 0xffL );
-}*/
-
-
-/*---------------------------------------------------*/
-static
-void bsPutUChar ( EState* s, UChar c )
-{
- bsW( s, 8, (UInt32)c );
-}
-
-
-/*---------------------------------------------------*/
-/*--- The back end proper ---*/
-/*---------------------------------------------------*/
-
-/*---------------------------------------------------*/
-static
-void makeMaps_e ( EState* s )
-{
- Int32 i;
- s->nInUse = 0;
- for (i = 0; i < 256; i++)
- if (s->inUse[i]) {
- s->unseqToSeq[i] = s->nInUse;
- s->nInUse++;
- }
-}
-
-
-/*---------------------------------------------------*/
-static
-void generateMTFValues ( EState* s )
-{
- UChar yy[256];
- Int32 i, j;
- Int32 zPend;
- Int32 wr;
- Int32 EOB;
-
- /*
- After sorting (eg, here),
- s->arr1 [ 0 .. s->nblock-1 ] holds sorted order,
- and
- ((UChar*)s->arr2) [ 0 .. s->nblock-1 ]
- holds the original block data.
-
- The first thing to do is generate the MTF values,
- and put them in
- ((UInt16*)s->arr1) [ 0 .. s->nblock-1 ].
- Because there are strictly fewer or equal MTF values
- than block values, ptr values in this area are overwritten
- with MTF values only when they are no longer needed.
-
- The final compressed bitstream is generated into the
- area starting at
- (UChar*) (&((UChar*)s->arr2)[s->nblock])
-
- These storage aliases are set up in bzCompressInit(),
- except for the last one, which is arranged in
- compressBlock().
- */
- UInt32* ptr = s->ptr;
- UChar* block = s->block;
- UInt16* mtfv = s->mtfv;
-
- makeMaps_e ( s );
- EOB = s->nInUse+1;
-
- for (i = 0; i <= EOB; i++) s->mtfFreq[i] = 0;
-
- wr = 0;
- zPend = 0;
- for (i = 0; i < s->nInUse; i++) yy[i] = (UChar) i;
-
- for (i = 0; i < s->nblock; i++) {
- UChar ll_i;
- AssertD ( wr <= i, "generateMTFValues(1)" );
- j = ptr[i]-1; if (j < 0) j += s->nblock;
- ll_i = s->unseqToSeq[block[j]];
- AssertD ( ll_i < s->nInUse, "generateMTFValues(2a)" );
-
- if (yy[0] == ll_i) {
- zPend++;
- } else {
-
- if (zPend > 0) {
- zPend--;
- while (True) {
- if (zPend & 1) {
- mtfv[wr] = BZ_RUNB; wr++;
- s->mtfFreq[BZ_RUNB]++;
- } else {
- mtfv[wr] = BZ_RUNA; wr++;
- s->mtfFreq[BZ_RUNA]++;
- }
- if (zPend < 2) break;
- zPend = (zPend - 2) / 2;
- };
- zPend = 0;
- }
- {
- register UChar rtmp;
- register UChar* ryy_j;
- register UChar rll_i;
- rtmp = yy[1];
- yy[1] = yy[0];
- ryy_j = &(yy[1]);
- rll_i = ll_i;
- while ( rll_i != rtmp ) {
- register UChar rtmp2;
- ryy_j++;
- rtmp2 = rtmp;
- rtmp = *ryy_j;
- *ryy_j = rtmp2;
- };
- yy[0] = rtmp;
- j = ryy_j - &(yy[0]);
- mtfv[wr] = j+1; wr++; s->mtfFreq[j+1]++;
- }
-
- }
- }
-
- if (zPend > 0) {
- zPend--;
- while (True) {
- if (zPend & 1) {
- mtfv[wr] = BZ_RUNB; wr++;
- s->mtfFreq[BZ_RUNB]++;
- } else {
- mtfv[wr] = BZ_RUNA; wr++;
- s->mtfFreq[BZ_RUNA]++;
- }
- if (zPend < 2) break;
- zPend = (zPend - 2) / 2;
- };
- zPend = 0;
- }
-
- mtfv[wr] = EOB; wr++; s->mtfFreq[EOB]++;
-
- s->nMTF = wr;
-}
-
-
-/*---------------------------------------------------*/
-#define BZ_LESSER_ICOST 0
-#define BZ_GREATER_ICOST 15
-
-static
-void sendMTFValues ( EState* s )
-{
- Int32 v, t, i, j, gs, ge, totc, bt, bc, iter;
- Int32 nSelectors, alphaSize, minLen, maxLen, selCtr;
- Int32 nGroups, nBytes;
-
- /*--
- UChar len [BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE];
- is a global since the decoder also needs it.
-
- Int32 code[BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE];
- Int32 rfreq[BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE];
- are also globals only used in this proc.
- Made global to keep stack frame size small.
- --*/
-
-
- UInt16 cost[BZ_N_GROUPS];
- Int32 fave[BZ_N_GROUPS];
-
- UInt16* mtfv = s->mtfv;
-
-
- alphaSize = s->nInUse+2;
- for (t = 0; t < BZ_N_GROUPS; t++)
- for (v = 0; v < alphaSize; v++)
- s->len[t][v] = BZ_GREATER_ICOST;
-
- /*--- Decide how many coding tables to use ---*/
- AssertH ( s->nMTF > 0, 3001 );
- if (s->nMTF < 200) nGroups = 2; else
- if (s->nMTF < 600) nGroups = 3; else
- if (s->nMTF < 1200) nGroups = 4; else
- if (s->nMTF < 2400) nGroups = 5; else
- nGroups = 6;
-
- /*--- Generate an initial set of coding tables ---*/
- {
- Int32 nPart, remF, tFreq, aFreq;
-
- nPart = nGroups;
- remF = s->nMTF;
- gs = 0;
- while (nPart > 0) {
- tFreq = remF / nPart;
- ge = gs-1;
- aFreq = 0;
- while (aFreq < tFreq && ge < alphaSize-1) {
- ge++;
- aFreq += s->mtfFreq[ge];
- }
-
- if (ge > gs
- && nPart != nGroups && nPart != 1
- && ((nGroups-nPart) % 2 == 1)) {
- aFreq -= s->mtfFreq[ge];
- ge--;
- }
-
- for (v = 0; v < alphaSize; v++)
- if (v >= gs && v <= ge)
- s->len[nPart-1][v] = BZ_LESSER_ICOST; else
- s->len[nPart-1][v] = BZ_GREATER_ICOST;
-
- nPart--;
- gs = ge+1;
- remF -= aFreq;
- }
- }
-
- /*---
- Iterate up to BZ_N_ITERS times to improve the tables.
- ---*/
- for (iter = 0; iter < BZ_N_ITERS; iter++) {
-
- for (t = 0; t < nGroups; t++) fave[t] = 0;
-
- for (t = 0; t < nGroups; t++)
- for (v = 0; v < alphaSize; v++)
- s->rfreq[t][v] = 0;
-
- /*---
- Set up an auxiliary length table which is used to fast-track
- the common case (nGroups == 6).
- ---*/
- if (nGroups == 6) {
- for (v = 0; v < alphaSize; v++) {
- s->len_pack[v][0] = (s->len[1][v] << 16) | s->len[0][v];
- s->len_pack[v][1] = (s->len[3][v] << 16) | s->len[2][v];
- s->len_pack[v][2] = (s->len[5][v] << 16) | s->len[4][v];
- }
- }
-
- nSelectors = 0;
- totc = 0;
- gs = 0;
- while (True) {
-
- /*--- Set group start & end marks. --*/
- if (gs >= s->nMTF) break;
- ge = gs + BZ_G_SIZE - 1;
- if (ge >= s->nMTF) ge = s->nMTF-1;
-
- /*--
- Calculate the cost of this group as coded
- by each of the coding tables.
- --*/
- for (t = 0; t < nGroups; t++) cost[t] = 0;
-
- if (nGroups == 6 && 50 == ge-gs+1) {
- /*--- fast track the common case ---*/
- register UInt32 cost01, cost23, cost45;
- register UInt16 icv;
- cost01 = cost23 = cost45 = 0;
-
-# define BZ_ITER(nn) \
- icv = mtfv[gs+(nn)]; \
- cost01 += s->len_pack[icv][0]; \
- cost23 += s->len_pack[icv][1]; \
- cost45 += s->len_pack[icv][2]; \
-
- BZ_ITER(0); BZ_ITER(1); BZ_ITER(2); BZ_ITER(3); BZ_ITER(4);
- BZ_ITER(5); BZ_ITER(6); BZ_ITER(7); BZ_ITER(8); BZ_ITER(9);
- BZ_ITER(10); BZ_ITER(11); BZ_ITER(12); BZ_ITER(13); BZ_ITER(14);
- BZ_ITER(15); BZ_ITER(16); BZ_ITER(17); BZ_ITER(18); BZ_ITER(19);
- BZ_ITER(20); BZ_ITER(21); BZ_ITER(22); BZ_ITER(23); BZ_ITER(24);
- BZ_ITER(25); BZ_ITER(26); BZ_ITER(27); BZ_ITER(28); BZ_ITER(29);
- BZ_ITER(30); BZ_ITER(31); BZ_ITER(32); BZ_ITER(33); BZ_ITER(34);
- BZ_ITER(35); BZ_ITER(36); BZ_ITER(37); BZ_ITER(38); BZ_ITER(39);
- BZ_ITER(40); BZ_ITER(41); BZ_ITER(42); BZ_ITER(43); BZ_ITER(44);
- BZ_ITER(45); BZ_ITER(46); BZ_ITER(47); BZ_ITER(48); BZ_ITER(49);
-
-# undef BZ_ITER
-
- cost[0] = cost01 & 0xffff; cost[1] = cost01 >> 16;
- cost[2] = cost23 & 0xffff; cost[3] = cost23 >> 16;
- cost[4] = cost45 & 0xffff; cost[5] = cost45 >> 16;
-
- } else {
- /*--- slow version which correctly handles all situations ---*/
- for (i = gs; i <= ge; i++) {
- UInt16 icv = mtfv[i];
- for (t = 0; t < nGroups; t++) cost[t] += s->len[t][icv];
- }
- }
-
- /*--
- Find the coding table which is best for this group,
- and record its identity in the selector table.
- --*/
- bc = 999999999; bt = -1;
- for (t = 0; t < nGroups; t++)
- if (cost[t] < bc) { bc = cost[t]; bt = t; };
- totc += bc;
- fave[bt]++;
- s->selector[nSelectors] = bt;
- nSelectors++;
-
- /*--
- Increment the symbol frequencies for the selected table.
- --*/
- if (nGroups == 6 && 50 == ge-gs+1) {
- /*--- fast track the common case ---*/
-
-# define BZ_ITUR(nn) s->rfreq[bt][ mtfv[gs+(nn)] ]++
-
- BZ_ITUR(0); BZ_ITUR(1); BZ_ITUR(2); BZ_ITUR(3); BZ_ITUR(4);
- BZ_ITUR(5); BZ_ITUR(6); BZ_ITUR(7); BZ_ITUR(8); BZ_ITUR(9);
- BZ_ITUR(10); BZ_ITUR(11); BZ_ITUR(12); BZ_ITUR(13); BZ_ITUR(14);
- BZ_ITUR(15); BZ_ITUR(16); BZ_ITUR(17); BZ_ITUR(18); BZ_ITUR(19);
- BZ_ITUR(20); BZ_ITUR(21); BZ_ITUR(22); BZ_ITUR(23); BZ_ITUR(24);
- BZ_ITUR(25); BZ_ITUR(26); BZ_ITUR(27); BZ_ITUR(28); BZ_ITUR(29);
- BZ_ITUR(30); BZ_ITUR(31); BZ_ITUR(32); BZ_ITUR(33); BZ_ITUR(34);
- BZ_ITUR(35); BZ_ITUR(36); BZ_ITUR(37); BZ_ITUR(38); BZ_ITUR(39);
- BZ_ITUR(40); BZ_ITUR(41); BZ_ITUR(42); BZ_ITUR(43); BZ_ITUR(44);
- BZ_ITUR(45); BZ_ITUR(46); BZ_ITUR(47); BZ_ITUR(48); BZ_ITUR(49);
-
-# undef BZ_ITUR
-
- } else {
- /*--- slow version which correctly handles all situations ---*/
- for (i = gs; i <= ge; i++)
- s->rfreq[bt][ mtfv[i] ]++;
- }
-
- gs = ge+1;
- }
-
- /*--
- Recompute the tables based on the accumulated frequencies.
- --*/
- for (t = 0; t < nGroups; t++)
- BZ2_hbMakeCodeLengths ( &(s->len[t][0]), &(s->rfreq[t][0]),
- alphaSize, 20 );
- }
-
-
- AssertH( nGroups < 8, 3002 );
- AssertH( nSelectors < 32768 &&
- nSelectors <= (2 + (NSIS_COMPRESS_BZIP2_LEVEL * 100000 / BZ_G_SIZE)),
- 3003 );
-
-
- /*--- Compute MTF values for the selectors. ---*/
- {
- UChar pos[BZ_N_GROUPS], ll_i, tmp2, tmp;
- for (i = 0; i < nGroups; i++) pos[i] = i;
- for (i = 0; i < nSelectors; i++) {
- ll_i = s->selector[i];
- j = 0;
- tmp = pos[j];
- while ( ll_i != tmp ) {
- j++;
- tmp2 = tmp;
- tmp = pos[j];
- pos[j] = tmp2;
- };
- pos[0] = tmp;
- s->selectorMtf[i] = j;
- }
- };
-
- /*--- Assign actual codes for the tables. --*/
- for (t = 0; t < nGroups; t++) {
- minLen = 32;
- maxLen = 0;
- for (i = 0; i < alphaSize; i++) {
- if (s->len[t][i] > maxLen) maxLen = s->len[t][i];
- if (s->len[t][i] < minLen) minLen = s->len[t][i];
- }
- AssertH ( !(maxLen > 20), 3004 );
- AssertH ( !(minLen < 1), 3005 );
- BZ2_hbAssignCodes ( &(s->code[t][0]), &(s->len[t][0]),
- minLen, maxLen, alphaSize );
- }
-
- /*--- Transmit the mapping table. ---*/
- {
- Bool inUse16[16];
- for (i = 0; i < 16; i++) {
- inUse16[i] = False;
- for (j = 0; j < 16; j++)
- if (s->inUse[i * 16 + j]) inUse16[i] = True;
- }
-
- nBytes = s->numZ;
- for (i = 0; i < 16; i++)
- if (inUse16[i]) bsW(s,1,1); else bsW(s,1,0);
-
- for (i = 0; i < 16; i++)
- if (inUse16[i])
- for (j = 0; j < 16; j++) {
- if (s->inUse[i * 16 + j]) bsW(s,1,1); else bsW(s,1,0);
- }
-
- }
-
- /*--- Now the selectors. ---*/
- nBytes = s->numZ;
- bsW ( s, 3, nGroups );
- bsW ( s, 15, nSelectors );
- for (i = 0; i < nSelectors; i++) {
- for (j = 0; j < s->selectorMtf[i]; j++) bsW(s,1,1);
- bsW(s,1,0);
- }
-
- /*--- Now the coding tables. ---*/
- nBytes = s->numZ;
-
- for (t = 0; t < nGroups; t++) {
- Int32 curr = s->len[t][0];
- bsW ( s, 5, curr );
- for (i = 0; i < alphaSize; i++) {
- while (curr < s->len[t][i]) { bsW(s,2,2); curr++; /* 10 */ };
- while (curr > s->len[t][i]) { bsW(s,2,3); curr--; /* 11 */ };
- bsW ( s, 1, 0 );
- }
- }
-
-
- /*--- And finally, the block data proper ---*/
- nBytes = s->numZ;
- selCtr = 0;
- gs = 0;
- while (True) {
- if (gs >= s->nMTF) break;
- ge = gs + BZ_G_SIZE - 1;
- if (ge >= s->nMTF) ge = s->nMTF-1;
- AssertH ( s->selector[selCtr] < nGroups, 3006 );
-
- if (nGroups == 6 && 50 == ge-gs+1) {
- /*--- fast track the common case ---*/
- UInt16 mtfv_i;
- UChar* s_len_sel_selCtr
- = &(s->len[s->selector[selCtr]][0]);
- Int32* s_code_sel_selCtr
- = &(s->code[s->selector[selCtr]][0]);
-
-# define BZ_ITAH(nn) \
- mtfv_i = mtfv[gs+(nn)]; \
- bsW ( s, \
- s_len_sel_selCtr[mtfv_i], \
- s_code_sel_selCtr[mtfv_i] )
-
- BZ_ITAH(0); BZ_ITAH(1); BZ_ITAH(2); BZ_ITAH(3); BZ_ITAH(4);
- BZ_ITAH(5); BZ_ITAH(6); BZ_ITAH(7); BZ_ITAH(8); BZ_ITAH(9);
- BZ_ITAH(10); BZ_ITAH(11); BZ_ITAH(12); BZ_ITAH(13); BZ_ITAH(14);
- BZ_ITAH(15); BZ_ITAH(16); BZ_ITAH(17); BZ_ITAH(18); BZ_ITAH(19);
- BZ_ITAH(20); BZ_ITAH(21); BZ_ITAH(22); BZ_ITAH(23); BZ_ITAH(24);
- BZ_ITAH(25); BZ_ITAH(26); BZ_ITAH(27); BZ_ITAH(28); BZ_ITAH(29);
- BZ_ITAH(30); BZ_ITAH(31); BZ_ITAH(32); BZ_ITAH(33); BZ_ITAH(34);
- BZ_ITAH(35); BZ_ITAH(36); BZ_ITAH(37); BZ_ITAH(38); BZ_ITAH(39);
- BZ_ITAH(40); BZ_ITAH(41); BZ_ITAH(42); BZ_ITAH(43); BZ_ITAH(44);
- BZ_ITAH(45); BZ_ITAH(46); BZ_ITAH(47); BZ_ITAH(48); BZ_ITAH(49);
-
-# undef BZ_ITAH
-
- } else {
- /*--- slow version which correctly handles all situations ---*/
- for (i = gs; i <= ge; i++) {
- bsW ( s,
- s->len [s->selector[selCtr]] [mtfv[i]],
- s->code [s->selector[selCtr]] [mtfv[i]] );
- }
- }
-
-
- gs = ge+1;
- selCtr++;
- }
- AssertH( selCtr == nSelectors, 3007 );
-
-}
-
-
-/*---------------------------------------------------*/
-void BZ2_compressBlock ( EState* s, Bool is_last_block )
-{
- if (s->nblock > 0) {
-
- if (s->blockNo > 1) s->numZ = 0;
-
-
- BZ2_blockSort ( s );
- }
-
- s->zbits = (UChar*) (&((UChar*)s->arr2)[s->nblock]);
-
- /*-- If this is the first block, create the stream header. --*/
- if (s->blockNo == 1) {
- BZ2_bsInitWrite ( s );
- }
-
- if (s->nblock > 0) {
-
- bsPutUChar ( s, 0x31 );
-
- bsW ( s, 24, s->origPtr );
- generateMTFValues ( s );
- sendMTFValues ( s );
- }
-
-
- /*-- If this is the last block, add the stream trailer. --*/
- if (is_last_block) {
-
- bsPutUChar ( s, 0x17 );
- bsFinishWrite ( s );
- }
-}
-
-
-/*-------------------------------------------------------------*/
-/*--- end compress.c ---*/
-/*-------------------------------------------------------------*/
+/* + * This file is a part of the bzip2 compression module for NSIS. + * + * Copyright and license information can be found below. + * Modifications Copyright (C) 1999-2007 Nullsoft and Contributors + * + * The original zlib source code is available at + * http://www.bzip.org/ + * + * This modification is not compatible with the original bzip2. + * + * This software is provided 'as-is', without any express or implied + * warranty. + */ + +/*-------------------------------------------------------------*/ +/*--- Compression machinery (not incl block sorting) ---*/ +/*--- compress.c ---*/ +/*-------------------------------------------------------------*/ + +/*-- + This file is a part of bzip2 and/or libbzip2, a program and + library for lossless, block-sorting data compression. + + Copyright (C) 1996-2000 Julian R Seward. All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + 2. The origin of this software must not be misrepresented; you must + not claim that you wrote the original software. If you use this + software in a product, an acknowledgment in the product + documentation would be appreciated but is not required. + + 3. Altered source versions must be plainly marked as such, and must + not be misrepresented as being the original software. + + 4. The name of the author may not be used to endorse or promote + products derived from this software without specific prior written + permission. + + THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS + OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY + DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE + GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + Julian Seward, Cambridge, UK. + jseward@acm.org + bzip2/libbzip2 version 1.0 of 21 March 2000 + + This program is based on (at least) the work of: + Mike Burrows + David Wheeler + Peter Fenwick + Alistair Moffat + Radford Neal + Ian H. Witten + Robert Sedgewick + Jon L. Bentley + + For more information on these sources, see the manual. +--*/ + +/*-- + CHANGES + ~~~~~~~ + 0.9.0 -- original version. + + 0.9.0a/b -- no changes in this file. + + 0.9.0c + * changed setting of nGroups in sendMTFValues() so as to + do a bit better on small files +--*/ + +#include "bzlib.h" + + +/*---------------------------------------------------*/ +/*--- Bit stream I/O ---*/ +/*---------------------------------------------------*/ + +/*---------------------------------------------------*/ +void BZ2_bsInitWrite ( EState* s ) +{ + s->bsLive = 0; + s->bsBuff = 0; +} + + +/*---------------------------------------------------*/ +static +void bsFinishWrite ( EState* s ) +{ + while (s->bsLive > 0) { + s->zbits[s->numZ] = (UChar)(s->bsBuff >> 24); + s->numZ++; + s->bsBuff <<= 8; + s->bsLive -= 8; + } +} + + +/*---------------------------------------------------*/ +#define bsNEEDW(nz) \ +{ \ + while (s->bsLive >= 8) { \ + s->zbits[s->numZ] \ + = (UChar)(s->bsBuff >> 24); \ + s->numZ++; \ + s->bsBuff <<= 8; \ + s->bsLive -= 8; \ + } \ +} + + +/*---------------------------------------------------*/ +static void bsW ( EState* s, Int32 n, UInt32 v ) +{ + bsNEEDW ( n ); + s->bsBuff |= (v << (32 - s->bsLive - n)); + s->bsLive += n; +} + + +/*---------------------------------------------------*/ +/*static +void bsPutUInt32 ( EState* s, UInt32 u ) +{ + bsW ( s, 8, (u >> 24) & 0xffL ); + bsW ( s, 8, (u >> 16) & 0xffL ); + bsW ( s, 8, (u >> 8) & 0xffL ); + bsW ( s, 8, u & 0xffL ); +}*/ + + +/*---------------------------------------------------*/ +static +void bsPutUChar ( EState* s, UChar c ) +{ + bsW( s, 8, (UInt32)c ); +} + + +/*---------------------------------------------------*/ +/*--- The back end proper ---*/ +/*---------------------------------------------------*/ + +/*---------------------------------------------------*/ +static +void makeMaps_e ( EState* s ) +{ + Int32 i; + s->nInUse = 0; + for (i = 0; i < 256; i++) + if (s->inUse[i]) { + s->unseqToSeq[i] = s->nInUse; + s->nInUse++; + } +} + + +/*---------------------------------------------------*/ +static +void generateMTFValues ( EState* s ) +{ + UChar yy[256]; + Int32 i, j; + Int32 zPend; + Int32 wr; + Int32 EOB; + + /* + After sorting (eg, here), + s->arr1 [ 0 .. s->nblock-1 ] holds sorted order, + and + ((UChar*)s->arr2) [ 0 .. s->nblock-1 ] + holds the original block data. + + The first thing to do is generate the MTF values, + and put them in + ((UInt16*)s->arr1) [ 0 .. s->nblock-1 ]. + Because there are strictly fewer or equal MTF values + than block values, ptr values in this area are overwritten + with MTF values only when they are no longer needed. + + The final compressed bitstream is generated into the + area starting at + (UChar*) (&((UChar*)s->arr2)[s->nblock]) + + These storage aliases are set up in bzCompressInit(), + except for the last one, which is arranged in + compressBlock(). + */ + UInt32* ptr = s->ptr; + UChar* block = s->block; + UInt16* mtfv = s->mtfv; + + makeMaps_e ( s ); + EOB = s->nInUse+1; + + for (i = 0; i <= EOB; i++) s->mtfFreq[i] = 0; + + wr = 0; + zPend = 0; + for (i = 0; i < s->nInUse; i++) yy[i] = (UChar) i; + + for (i = 0; i < s->nblock; i++) { + UChar ll_i; + AssertD ( wr <= i, "generateMTFValues(1)" ); + j = ptr[i]-1; if (j < 0) j += s->nblock; + ll_i = s->unseqToSeq[block[j]]; + AssertD ( ll_i < s->nInUse, "generateMTFValues(2a)" ); + + if (yy[0] == ll_i) { + zPend++; + } else { + + if (zPend > 0) { + zPend--; + while (True) { + if (zPend & 1) { + mtfv[wr] = BZ_RUNB; wr++; + s->mtfFreq[BZ_RUNB]++; + } else { + mtfv[wr] = BZ_RUNA; wr++; + s->mtfFreq[BZ_RUNA]++; + } + if (zPend < 2) break; + zPend = (zPend - 2) / 2; + }; + zPend = 0; + } + { + register UChar rtmp; + register UChar* ryy_j; + register UChar rll_i; + rtmp = yy[1]; + yy[1] = yy[0]; + ryy_j = &(yy[1]); + rll_i = ll_i; + while ( rll_i != rtmp ) { + register UChar rtmp2; + ryy_j++; + rtmp2 = rtmp; + rtmp = *ryy_j; + *ryy_j = rtmp2; + }; + yy[0] = rtmp; + j = ryy_j - &(yy[0]); + mtfv[wr] = j+1; wr++; s->mtfFreq[j+1]++; + } + + } + } + + if (zPend > 0) { + zPend--; + while (True) { + if (zPend & 1) { + mtfv[wr] = BZ_RUNB; wr++; + s->mtfFreq[BZ_RUNB]++; + } else { + mtfv[wr] = BZ_RUNA; wr++; + s->mtfFreq[BZ_RUNA]++; + } + if (zPend < 2) break; + zPend = (zPend - 2) / 2; + }; + zPend = 0; + } + + mtfv[wr] = EOB; wr++; s->mtfFreq[EOB]++; + + s->nMTF = wr; +} + + +/*---------------------------------------------------*/ +#define BZ_LESSER_ICOST 0 +#define BZ_GREATER_ICOST 15 + +static +void sendMTFValues ( EState* s ) +{ + Int32 v, t, i, j, gs, ge, totc, bt, bc, iter; + Int32 nSelectors, alphaSize, minLen, maxLen, selCtr; + Int32 nGroups, nBytes; + + /*-- + UChar len [BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE]; + is a global since the decoder also needs it. + + Int32 code[BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE]; + Int32 rfreq[BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE]; + are also globals only used in this proc. + Made global to keep stack frame size small. + --*/ + + + UInt16 cost[BZ_N_GROUPS]; + Int32 fave[BZ_N_GROUPS]; + + UInt16* mtfv = s->mtfv; + + + alphaSize = s->nInUse+2; + for (t = 0; t < BZ_N_GROUPS; t++) + for (v = 0; v < alphaSize; v++) + s->len[t][v] = BZ_GREATER_ICOST; + + /*--- Decide how many coding tables to use ---*/ + AssertH ( s->nMTF > 0, 3001 ); + if (s->nMTF < 200) nGroups = 2; else + if (s->nMTF < 600) nGroups = 3; else + if (s->nMTF < 1200) nGroups = 4; else + if (s->nMTF < 2400) nGroups = 5; else + nGroups = 6; + + /*--- Generate an initial set of coding tables ---*/ + { + Int32 nPart, remF, tFreq, aFreq; + + nPart = nGroups; + remF = s->nMTF; + gs = 0; + while (nPart > 0) { + tFreq = remF / nPart; + ge = gs-1; + aFreq = 0; + while (aFreq < tFreq && ge < alphaSize-1) { + ge++; + aFreq += s->mtfFreq[ge]; + } + + if (ge > gs + && nPart != nGroups && nPart != 1 + && ((nGroups-nPart) % 2 == 1)) { + aFreq -= s->mtfFreq[ge]; + ge--; + } + + for (v = 0; v < alphaSize; v++) + if (v >= gs && v <= ge) + s->len[nPart-1][v] = BZ_LESSER_ICOST; else + s->len[nPart-1][v] = BZ_GREATER_ICOST; + + nPart--; + gs = ge+1; + remF -= aFreq; + } + } + + /*--- + Iterate up to BZ_N_ITERS times to improve the tables. + ---*/ + for (iter = 0; iter < BZ_N_ITERS; iter++) { + + for (t = 0; t < nGroups; t++) fave[t] = 0; + + for (t = 0; t < nGroups; t++) + for (v = 0; v < alphaSize; v++) + s->rfreq[t][v] = 0; + + /*--- + Set up an auxiliary length table which is used to fast-track + the common case (nGroups == 6). + ---*/ + if (nGroups == 6) { + for (v = 0; v < alphaSize; v++) { + s->len_pack[v][0] = (s->len[1][v] << 16) | s->len[0][v]; + s->len_pack[v][1] = (s->len[3][v] << 16) | s->len[2][v]; + s->len_pack[v][2] = (s->len[5][v] << 16) | s->len[4][v]; + } + } + + nSelectors = 0; + totc = 0; + gs = 0; + while (True) { + + /*--- Set group start & end marks. --*/ + if (gs >= s->nMTF) break; + ge = gs + BZ_G_SIZE - 1; + if (ge >= s->nMTF) ge = s->nMTF-1; + + /*-- + Calculate the cost of this group as coded + by each of the coding tables. + --*/ + for (t = 0; t < nGroups; t++) cost[t] = 0; + + if (nGroups == 6 && 50 == ge-gs+1) { + /*--- fast track the common case ---*/ + register UInt32 cost01, cost23, cost45; + register UInt16 icv; + cost01 = cost23 = cost45 = 0; + +# define BZ_ITER(nn) \ + icv = mtfv[gs+(nn)]; \ + cost01 += s->len_pack[icv][0]; \ + cost23 += s->len_pack[icv][1]; \ + cost45 += s->len_pack[icv][2]; \ + + BZ_ITER(0); BZ_ITER(1); BZ_ITER(2); BZ_ITER(3); BZ_ITER(4); + BZ_ITER(5); BZ_ITER(6); BZ_ITER(7); BZ_ITER(8); BZ_ITER(9); + BZ_ITER(10); BZ_ITER(11); BZ_ITER(12); BZ_ITER(13); BZ_ITER(14); + BZ_ITER(15); BZ_ITER(16); BZ_ITER(17); BZ_ITER(18); BZ_ITER(19); + BZ_ITER(20); BZ_ITER(21); BZ_ITER(22); BZ_ITER(23); BZ_ITER(24); + BZ_ITER(25); BZ_ITER(26); BZ_ITER(27); BZ_ITER(28); BZ_ITER(29); + BZ_ITER(30); BZ_ITER(31); BZ_ITER(32); BZ_ITER(33); BZ_ITER(34); + BZ_ITER(35); BZ_ITER(36); BZ_ITER(37); BZ_ITER(38); BZ_ITER(39); + BZ_ITER(40); BZ_ITER(41); BZ_ITER(42); BZ_ITER(43); BZ_ITER(44); + BZ_ITER(45); BZ_ITER(46); BZ_ITER(47); BZ_ITER(48); BZ_ITER(49); + +# undef BZ_ITER + + cost[0] = cost01 & 0xffff; cost[1] = cost01 >> 16; + cost[2] = cost23 & 0xffff; cost[3] = cost23 >> 16; + cost[4] = cost45 & 0xffff; cost[5] = cost45 >> 16; + + } else { + /*--- slow version which correctly handles all situations ---*/ + for (i = gs; i <= ge; i++) { + UInt16 icv = mtfv[i]; + for (t = 0; t < nGroups; t++) cost[t] += s->len[t][icv]; + } + } + + /*-- + Find the coding table which is best for this group, + and record its identity in the selector table. + --*/ + bc = 999999999; bt = -1; + for (t = 0; t < nGroups; t++) + if (cost[t] < bc) { bc = cost[t]; bt = t; }; + totc += bc; + fave[bt]++; + s->selector[nSelectors] = bt; + nSelectors++; + + /*-- + Increment the symbol frequencies for the selected table. + --*/ + if (nGroups == 6 && 50 == ge-gs+1) { + /*--- fast track the common case ---*/ + +# define BZ_ITUR(nn) s->rfreq[bt][ mtfv[gs+(nn)] ]++ + + BZ_ITUR(0); BZ_ITUR(1); BZ_ITUR(2); BZ_ITUR(3); BZ_ITUR(4); + BZ_ITUR(5); BZ_ITUR(6); BZ_ITUR(7); BZ_ITUR(8); BZ_ITUR(9); + BZ_ITUR(10); BZ_ITUR(11); BZ_ITUR(12); BZ_ITUR(13); BZ_ITUR(14); + BZ_ITUR(15); BZ_ITUR(16); BZ_ITUR(17); BZ_ITUR(18); BZ_ITUR(19); + BZ_ITUR(20); BZ_ITUR(21); BZ_ITUR(22); BZ_ITUR(23); BZ_ITUR(24); + BZ_ITUR(25); BZ_ITUR(26); BZ_ITUR(27); BZ_ITUR(28); BZ_ITUR(29); + BZ_ITUR(30); BZ_ITUR(31); BZ_ITUR(32); BZ_ITUR(33); BZ_ITUR(34); + BZ_ITUR(35); BZ_ITUR(36); BZ_ITUR(37); BZ_ITUR(38); BZ_ITUR(39); + BZ_ITUR(40); BZ_ITUR(41); BZ_ITUR(42); BZ_ITUR(43); BZ_ITUR(44); + BZ_ITUR(45); BZ_ITUR(46); BZ_ITUR(47); BZ_ITUR(48); BZ_ITUR(49); + +# undef BZ_ITUR + + } else { + /*--- slow version which correctly handles all situations ---*/ + for (i = gs; i <= ge; i++) + s->rfreq[bt][ mtfv[i] ]++; + } + + gs = ge+1; + } + + /*-- + Recompute the tables based on the accumulated frequencies. + --*/ + for (t = 0; t < nGroups; t++) + BZ2_hbMakeCodeLengths ( &(s->len[t][0]), &(s->rfreq[t][0]), + alphaSize, 20 ); + } + + + AssertH( nGroups < 8, 3002 ); + AssertH( nSelectors < 32768 && + nSelectors <= (2 + (NSIS_COMPRESS_BZIP2_LEVEL * 100000 / BZ_G_SIZE)), + 3003 ); + + + /*--- Compute MTF values for the selectors. ---*/ + { + UChar pos[BZ_N_GROUPS], ll_i, tmp2, tmp; + for (i = 0; i < nGroups; i++) pos[i] = i; + for (i = 0; i < nSelectors; i++) { + ll_i = s->selector[i]; + j = 0; + tmp = pos[j]; + while ( ll_i != tmp ) { + j++; + tmp2 = tmp; + tmp = pos[j]; + pos[j] = tmp2; + }; + pos[0] = tmp; + s->selectorMtf[i] = j; + } + }; + + /*--- Assign actual codes for the tables. --*/ + for (t = 0; t < nGroups; t++) { + minLen = 32; + maxLen = 0; + for (i = 0; i < alphaSize; i++) { + if (s->len[t][i] > maxLen) maxLen = s->len[t][i]; + if (s->len[t][i] < minLen) minLen = s->len[t][i]; + } + AssertH ( !(maxLen > 20), 3004 ); + AssertH ( !(minLen < 1), 3005 ); + BZ2_hbAssignCodes ( &(s->code[t][0]), &(s->len[t][0]), + minLen, maxLen, alphaSize ); + } + + /*--- Transmit the mapping table. ---*/ + { + Bool inUse16[16]; + for (i = 0; i < 16; i++) { + inUse16[i] = False; + for (j = 0; j < 16; j++) + if (s->inUse[i * 16 + j]) inUse16[i] = True; + } + + nBytes = s->numZ; + for (i = 0; i < 16; i++) + if (inUse16[i]) bsW(s,1,1); else bsW(s,1,0); + + for (i = 0; i < 16; i++) + if (inUse16[i]) + for (j = 0; j < 16; j++) { + if (s->inUse[i * 16 + j]) bsW(s,1,1); else bsW(s,1,0); + } + + } + + /*--- Now the selectors. ---*/ + nBytes = s->numZ; + bsW ( s, 3, nGroups ); + bsW ( s, 15, nSelectors ); + for (i = 0; i < nSelectors; i++) { + for (j = 0; j < s->selectorMtf[i]; j++) bsW(s,1,1); + bsW(s,1,0); + } + + /*--- Now the coding tables. ---*/ + nBytes = s->numZ; + + for (t = 0; t < nGroups; t++) { + Int32 curr = s->len[t][0]; + bsW ( s, 5, curr ); + for (i = 0; i < alphaSize; i++) { + while (curr < s->len[t][i]) { bsW(s,2,2); curr++; /* 10 */ }; + while (curr > s->len[t][i]) { bsW(s,2,3); curr--; /* 11 */ }; + bsW ( s, 1, 0 ); + } + } + + + /*--- And finally, the block data proper ---*/ + nBytes = s->numZ; + selCtr = 0; + gs = 0; + while (True) { + if (gs >= s->nMTF) break; + ge = gs + BZ_G_SIZE - 1; + if (ge >= s->nMTF) ge = s->nMTF-1; + AssertH ( s->selector[selCtr] < nGroups, 3006 ); + + if (nGroups == 6 && 50 == ge-gs+1) { + /*--- fast track the common case ---*/ + UInt16 mtfv_i; + UChar* s_len_sel_selCtr + = &(s->len[s->selector[selCtr]][0]); + Int32* s_code_sel_selCtr + = &(s->code[s->selector[selCtr]][0]); + +# define BZ_ITAH(nn) \ + mtfv_i = mtfv[gs+(nn)]; \ + bsW ( s, \ + s_len_sel_selCtr[mtfv_i], \ + s_code_sel_selCtr[mtfv_i] ) + + BZ_ITAH(0); BZ_ITAH(1); BZ_ITAH(2); BZ_ITAH(3); BZ_ITAH(4); + BZ_ITAH(5); BZ_ITAH(6); BZ_ITAH(7); BZ_ITAH(8); BZ_ITAH(9); + BZ_ITAH(10); BZ_ITAH(11); BZ_ITAH(12); BZ_ITAH(13); BZ_ITAH(14); + BZ_ITAH(15); BZ_ITAH(16); BZ_ITAH(17); BZ_ITAH(18); BZ_ITAH(19); + BZ_ITAH(20); BZ_ITAH(21); BZ_ITAH(22); BZ_ITAH(23); BZ_ITAH(24); + BZ_ITAH(25); BZ_ITAH(26); BZ_ITAH(27); BZ_ITAH(28); BZ_ITAH(29); + BZ_ITAH(30); BZ_ITAH(31); BZ_ITAH(32); BZ_ITAH(33); BZ_ITAH(34); + BZ_ITAH(35); BZ_ITAH(36); BZ_ITAH(37); BZ_ITAH(38); BZ_ITAH(39); + BZ_ITAH(40); BZ_ITAH(41); BZ_ITAH(42); BZ_ITAH(43); BZ_ITAH(44); + BZ_ITAH(45); BZ_ITAH(46); BZ_ITAH(47); BZ_ITAH(48); BZ_ITAH(49); + +# undef BZ_ITAH + + } else { + /*--- slow version which correctly handles all situations ---*/ + for (i = gs; i <= ge; i++) { + bsW ( s, + s->len [s->selector[selCtr]] [mtfv[i]], + s->code [s->selector[selCtr]] [mtfv[i]] ); + } + } + + + gs = ge+1; + selCtr++; + } + AssertH( selCtr == nSelectors, 3007 ); + +} + + +/*---------------------------------------------------*/ +void BZ2_compressBlock ( EState* s, Bool is_last_block ) +{ + if (s->nblock > 0) { + + if (s->blockNo > 1) s->numZ = 0; + + + BZ2_blockSort ( s ); + } + + s->zbits = (UChar*) (&((UChar*)s->arr2)[s->nblock]); + + /*-- If this is the first block, create the stream header. --*/ + if (s->blockNo == 1) { + BZ2_bsInitWrite ( s ); + } + + if (s->nblock > 0) { + + bsPutUChar ( s, 0x31 ); + + bsW ( s, 24, s->origPtr ); + generateMTFValues ( s ); + sendMTFValues ( s ); + } + + + /*-- If this is the last block, add the stream trailer. --*/ + if (is_last_block) { + + bsPutUChar ( s, 0x17 ); + bsFinishWrite ( s ); + } +} + + +/*-------------------------------------------------------------*/ +/*--- end compress.c ---*/ +/*-------------------------------------------------------------*/ |