summaryrefslogtreecommitdiff
path: root/msexpand
blob: fe39f44c94cca5577356cb08157f262d4ebc6050 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
#!/usr/bin/perl
# msexpand written by Paul Laufer, 2001, to help him learn perl ;)
# - added fillup with zeros if expanded file is less than original;
# - added binmode() to work on platforms which need it; by G. Knauf.
# last change: 10-May-2002  gk.
#
#    This program is free software; you can redistribute it and/or modify
#    it under the terms of the GNU General Public License as published by
#    the Free Software Foundation; either version 2 of the License, or
#    (at your option) any later version.
#
#    This program is distributed in the hope that it will be useful,
#    but WITHOUT ANY WARRANTY; without even the implied warranty of
#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#    GNU General Public License for more details.
#
#    You should have received a copy of the GNU General Public License
#    along with this program; if not, write to the Free Software
#    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
#
# Microsoft compress file structure:
# Uses a 12 bit (4k) sliding window Lempel Ziv variant.
#
# header: 13 bytes
# 	int MAGIC1 = 0x44445a53; // "SZDD"
# 	int MAGIC2 = 0x3327f088;
# 	char MAGIC3 = 0x41;
# 	char last_char_of_filename; // offset 0x09
# 	unsigned short int size_low;
# 	unsigned short int size_high;
#
# data portion:
# Flag byte followed by eight data elements, for each of the eight flag bits.
# If a flag bit is 1, the corresponding element is a data byte (just copied).
# If the flag bit is 0, the corresponding element is a code, comprised of two
# bytes.  The two bytes are divided into two parts. The upper 12 bits are the
# offset into the 4k window, and the lower 4 bits are the length of the string,
# minus 3. Thus the string length is between 3 and 18 bytes.

# Flag byte   Eight data elements
# 10010011
# |||||||+----byte
# ||||||+-----byte
# |||||+------code
# ||||+-------code
# |||+--------byte
# ||+---------code
# |+----------code
# +-----------byte
#
# The window is wrapped, ie: with an offset of 4092 and a length of 10, you
# will get the last 4 bytes then the first 6 bytes of the window in the output.
#
# Of course, the window must be updated with expanded strings and bytes as they
# are read. Oh, yeah, the window needs to be initialized with spaces, not
# zeros.

$MAGIC1 = 0x44445a53;
$MAGIC2 = 0x3327f088;
$MAGIC3 = 0x41;

$WINSIZE = 4096;	# Window Size
$HEADSIZE = 14;		# Size of file header

sub LENGTH {
	my $x = shift;
	return ($x & 0x0F) + 3;
}
sub OFFSET {
	my ($x1, $x2) = @_;
	return (((($x2 & 0xF0) << 4) + $x1 + 0x0010) & 0x0FFF)
}
sub WRAPFIX {
	my $x = shift;
	return ($x & ($WINSIZE - 1));
}
sub BITSET {
	my ($byte, $bit) = @_;
	return (($byte & (1<<$bit)) > 0);
}
# This sub directly translated from my C source. Runs slow in perl...
sub LZ_expand {
	my ($input, $size_uncomp) = @_;
	local ($curr_pos, $location, $bit_map, $byte1, $byte2);
	local ($window[$WINSIZE], $length, $counter, $x, $max);

	# initialize window to all spaces (cleaner way?)
	for($x = 0; $x < $WINSIZE; $x += 1) {
		$window[$x] = ' ';
	}

	$curr_pos = 0;
	$index = $HEADSIZE;
	while($curr_pos < $size_uncomp) {
		$bit_map = unpack('C', substr($input, $index, 1));
		$index++;
		if($index >= $size_orig) {
			return $curr_pos;
		}

		for($counter = 0; $counter < 8; $counter++) {
			if(!BITSET($bit_map, $counter)) {
				# Its a code, so process
				($byte1, $byte2) =
					unpack('CC', substr($input, $index, 2));
				$index += 2;
				if($index >= $size_orig) {
					return $curr_pos;
				}

				$length = LENGTH($byte2);
				$location = OFFSET($byte1, $byte2);

				while($length > 0) {
					$byte1 = $window[WRAPFIX($location)];
					$window[WRAPFIX($curr_pos)] = $byte1;
					printf(OUTFILE "%c", $byte1);
					$curr_pos++;
					$location++;
					$length--;
				}
			} else {
				# Its just a data byte
				$byte1 = unpack('C', substr($input, $index, 1));
				$index++;
				$window[WRAPFIX($curr_pos)] = $byte1;
				printf(OUTFILE "%c", $byte1);
				$curr_pos++;
			}
			if ($index >= $size_orig) {
				return $curr_pos;
			}
		}
	}

	return $curr_pos;
}

# start here

if($#ARGV < 0) {
  print "Microsoft Compressed File Expander\n";
  print "Written by Paul Laufer 2001-03-13\n\n";
  print "Usage:\n\tmsexpand.pl FILE\n\n";
  print "Where FILE is a valid Microsoft Compressed file. Files of this type\nusually have the last letter replaced with an underscore, ie rmquasar.vx_.\nThe expanded file will be the original filename with the last underscore\nreplaced with the original letter, ie rmquasar.vxd.\n\n";
  exit(1);
}

open(INFILE,"<$ARGV[0]") or die "Can't open file for input:";
binmode(INFILE);
undef $/;
$input = <INFILE>;
close(INFILE);

($magic1, $magic2, $magic3, $lastchar, $size_low, $size_high) =
	unpack 'IICa1SS', substr($input, 0, $HEADSIZE);

if( $magic1 != $MAGIC1 || $magic2 != $MAGIC2 || $magic3 != $MAGIC3 ) {
	print "Error: Input file is not a Microsoft Compress format.\n";
	exit(1);
} else {
	print "Input file appears to be Microsoft Compress format, proceeding\n";
}

$outfile = $ARGV[0];
substr($outfile, -1, 1) = $lastchar;

print "Output filename = ", $outfile, "\n";
$size_uncomp = ($size_high << 0x10) + $size_low;
$size_orig = -s $ARGV[0];
print "Original file size: ", $size_uncomp, " bytes\n";
printf "Compression ratio: %.1f%%\n", $size_orig*100/$size_uncomp;

open(OUTFILE, ">$outfile") or die "Can't open file for output:";
binmode(OUTFILE);
# Time to start expanding the file
$size_expand = LZ_expand($input, $size_uncomp);
printf("Expanded size: %d bytes\n", $size_expand);
printf(OUTFILE "%s", ("\x0" x ($size_uncomp-$size_expand))) if ($size_expand < $size_uncomp);
close(OUTFILE);

printf("Output file size: %d bytes\n", (stat($outfile))[7]);