1 files changed, 524 insertions, 93 deletions
diff --git a/src/testpattern/compress-checksums.in b/src/testpattern/compress-checksums.in
index 19af2f1..97d3574 100644
--- a/src/testpattern/compress-checksums.in
+++ b/src/testpattern/compress-checksums.in
@@ -1,5 +1,5 @@
 #!@PERL@
-## Copyright (C) 2016 Robert Krawitz
+## Copyright (C) 2016-2017 Robert Krawitz
 ##
 ## This program is free software; you can redistribute it and/or modify
 ## it under the terms of the GNU General Public License as published by
@@ -12,52 +12,225 @@
 ## GNU General Public License for more details.
 ##
 ## You should have received a copy of the GNU General Public License
-## along with this program; if not, write to the Free Software
-## Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+## along with this program.  If not, see <https://www.gnu.org/licenses/>.
+
+# Compress Gutenprint regression test results
+#
+# The Gutenprint regression test uses run-testpattern-2 to generate
+# hashes for each test case it runs.  It currently runs about 370,000
+# cases and generates 75 MB of output (if we used a less...verbose
+# hash than SHA-512, we could save a fair bit of that, but it would
+# still be something in the range of 40 MB).  This is too big to store
+# for very long, or to store in the repo.
+#
+# But there's a lot of compression we can do, some of it lossy and
+# some lossless, to greatly reduce the data volume.  Obviously, we can
+# use conventional lossless compression (such as bzip2), but by
+# understanding what we're trying to accomplish we can do much better.
+#
+# The three important points are:
+#
+# 1) There's a lot of duplication (a lot of test cases generate the
+#    same output).  Some of this is because a lot of printers actually
+#    use the same driver, some of it is because in some cases
+#    different options don't affect the output, and some of it is
+#    because under many conditions different drivers generate the same
+#    output.  Of the 369195 test cases as of 5.3.0-pre1, there are
+#    only 17701 distinct outputs, as measured by counting the number
+#    of distinct hash values.
+#
+# 2) We don't need the entire hash value, only enough to determine
+#    with reasonable reliability that a particular test case's output
+#    has not changed between runs.  If we keep, say, 32 bits of hash
+#    value, we're going to miss a change in output only once in every
+#    4 billion cases.  For this purpose, that's good enough.  So we
+#    keep only enough hash to distinguish between all of the hash
+#    values in the test run.  That's usually between 7 and 9 bytes
+#    (28-36 bits).  If we miss a change in output once in 256 million
+#    cases, that should be OK.  And for that matter, if we sort the
+#    hashes, we only need to keep the delta between adjacent hash
+#    value; leading hex digits in common can be omitted.
+#
+# 3) We don't care about preserving the order of the test runs.  It
+#    doesn't matter at all if the test runs show up in different
+#    order; all we need to do is match them up case by case to see
+#    what has changed.  The cases are all independent.
+#
+# We also know about the structure of the test data.  Each test result
+# is stored as <hash, name>.  The names themselves are structured as
+# <printer_option0_value0_option1_value1...>.  Since a lot of
+# components are preserved from case to case, we can do a lot of
+# common component elimination and prefix/suffix removal.
+#
+# It turns out that we can achieve better compression by putting the
+# printer name on the end of the case rather than the beginning, since
+# in many cases many printers share the same information.  The
+# original format puts the printer name first, but no reason we can't
+# reorder it for storage, as long as we can undo it later when we
+# compare the results.
+#
+# Surprisingly (perhaps) after all of these machinations the data
+# still doesn't have lot of entropy; bzip2 compresses even our best
+# work by 90%.  And bzip is far from the best compression out there.
+# The best that I have found is zpaq
+# (http://mattmahoney.net/dc/zpaq.html).  It's very slow but very,
+# very effective.  The zpaq utility is set up for archiving rather
+# than file compression, but that's nothing that a little scripting
+# can't get around.  For our test data, zpaq gets another 35-45%,
+# depending upon compression level (initial completely uncompressed
+# data is another story; zpaq is about 70% better!).
+
+# Compression levels (cumulative):
+#
+# 0  - none
+# 1  - elide duplicate hashes (LOSSY -- reorders data)
+# 2  - use global minimum hash length prefix (LOSSY -- reduces hash data)
+# 3  - eliminate common second level groups (run encoding)
+# 4  - use pairwise minimum hash length prefix
+# 5  - use substitution table for common option names
+# 6  - use pre-substitution table for certain value names (LOSSY -- reorder)
+# 7  - common prefix/suffix elimination for chunks
+#    - pack multiple consecutive identical chunks
+#    - eliminate BW/color prefix
+# 8  - replace space (inter-record gaps) with high bit set on last byte
+# 9  - compact subexpression elimination (7a)
+
+# There are 9 defined compression levels to match usage of other
+# compression programs.
+
+# 369195 test cases in the sample run
+# CPU: Intel Xeon E3-1505M, Lenovo P70
+
+# Method	Size	Packed	Time	Ratio	Packed	  bzip2	Bits/case
+# 0	    75512435   1970522	0.08	  1.0	  38.3	6998640 42.69
+# 1	    11066295   1316628	2.82	  6.8	  57.3	1620995 28.52
+# 2	     8924474	228519	2.94	  8.4	 330.4	 453281  4.95
+# 3	     5925516	216037	3.16	 11.3	 349.5	 383805  4.68
+# 4	     5874446	205435	3.22	 11.4	 367.5	 354349  4.45
+# 5	     5306533	195594	3.61	 14.2	 386.0	 316775	 4.24
+# 6	     4788102	182921	3.88	 15.7	 412.8	 290248  3.96
+# 7b	     3266449	180295	    	 23.1	 417.3	 221237  3.91
+# 7a	     2401276	126804	    	 31.4	 548.7	 200778  2.75
+# 7	     2312771	126487	4.67	 32.6	 595.5	 200371  2.74
+# 8	     2110942	122762	5.12	 35.7	 615.1	 197529  2.66
+# 9	     1891341	121970  5.30	 39.9	 619.1	 187040  2.64
+
+# Currently reserved special characters:
+#
+# ( ( ) & ? ) reserved for level 3.
+# ; is reserved for level 5.
+# Braces {} are reserved for level 6.
+# (* + \ /) are reserved for level 7.
+# (= ! " , ' % >) are reserved for level 7 (7a)
+# High bit set is reserved for level 8.
+# Non-printable ASCII (0-31) is reserved for level 9.
+# (- _) are used as parts of tokens and token separators.
+# | is reserved for use as a temporary.
+
+# Non-alphanumeric characters currently unreserved are:
+# ` ~ @ # $ ^ [ ] <
 
 use strict;
+use Getopt::Long;
+Getopt::Long::Configure("bundling", "no_ignore_case", "pass_through");
+
+my ($comp_level) = 9;
 
 my (%checksums);
 
-while (<>) {
-    chomp;
-    my ($sum, @keys) = split;
-    my (@pchunks) = ();
-    foreach my $k (@keys) {
-	my (@chunks) = split(/_/, $k, -1);
-	foreach my $i (0..$#chunks) {
-	    if ($chunks[$i] eq '') {
-		$chunks[$i] = $pchunks[$i];
-	    } elsif ($chunks[$i] =~ /^([0-9]+)\+(.*)/) {
-		$chunks[$i] = substr($pchunks[$i], 0, $1) . $2;
-	    }
-	}
-	my $key = join('_', @chunks), '.prn';
-	if (! defined $checksums{$sum}) {
-	    $checksums{$sum} = [];
-	}
-	push @{$checksums{$sum}}, $key;
-    }
+my (@ptable) =
+    (["=\\+", pack("C", 27)],
+     ["!\\+", pack("C", 26)],
+     ['"\+', pack("C", 25)],
+     [",\\+", pack("C", 24)],
+     ["'\\+", pack("C", 23)],
+     ["%\\+", pack("C", 22)],
+     [">\\+", pack("C", 21)],
+     ["20\\+", pack("C", 20)],
+     ["19\\+", pack("C", 19)],
+     ["18\\+", pack("C", 18)],
+     ["17\\+", pack("C", 17)],
+     ["16\\+", pack("C", 16)],
+     ["15\\+", pack("C", 15)],
+     ["14\\+", pack("C", 14)],
+     ["13\\+", pack("C", 13)],
+     ["12\\+", pack("C", 12)],
+     ["11\\+", pack("C", 11)],
+     ["10\\+", pack("C", 9)],	# 0xa is newline!
+     ["9\\+", pack("C", 8)],
+     ["8\\+", pack("C", 7)],
+     ["7\\+", pack("C", 6)],
+     ["6\\+", pack("C", 5)],
+     ["5\\+", pack("C", 4)],
+     ["4\\+", pack("C", 3)],
+     ["3\\+", pack("C", 2)],
+     ["2\\+", pack("C", 1)]);
+
+my (%asc_table);
+map { $asc_table{pack("C", $_)} = pack("C", $_ + 128); } (0..127);
+
+my $psub_code = <<\EOF;
+sub psub($) {
+    my ($out) = @_;
+EOF
+foreach my $psub (@ptable) {
+    $psub_code .= "    \$out =~ s/$$psub[0]/$$psub[1]/go;\n";
+}
+$psub_code .= <<\EOF;
+    return $out;
+}
+EOF
+
+eval $psub_code;
+
+my (%sub_table) = (
+    "PrintingMode" => "P;",
+    "PageSize" => "S;",
+    "DitherAlgorithm" => "Z;",
+    "Resolution" => "R;",
+    "ColorCorrection" => "C;",
+    "Color" => "c;",
+    "MediaType" => "M;",
+    "InputSlot" => "I;"
+    );
+
+my ($spc) = unpack("C", " ");
+
+sub precompress($) {
+    my ($run) = @_;
+    # This one really doesn't belong here.  This is to work around the
+    # (very domain-specifid) fact that most of the runs specify a dither
+    # algorithm, except for the ones testing the dither algorithms per se.
+    # The common group elimination code needs to have everything to the right
+    # of the second option to be fixed except for the printer name.
+    # If dither algorithm sorts between other options, it often
+    # breaks up a run of option names, since if dither algorithm is the
+    # key option it won't appear to the right, breaking up a run of fixed
+    # right hand components.
+    #
+    # This has a significant effect on compression; at level 10 the
+    # 5.3.0-pre1 test dataset consumes 1938695 bytes without this
+    # hack, but 1891341 with it -- about 2.5% savings.  The zpaq archive
+    # is 123645 bytes without and 121970 with, or 1.3%.
+    $run =~ s/DitherAlgorithm/Z;/go;
+    $run =~s/600x600dpi_(draft|high|mono|photo|std|tshirt)([^?&_]*)/{600|$1|$2}/go;
+    $run =~s/720x720dpi_(draft|high|mono|photo|std|tshirt)([^?&_]*)/{720|$1|$2}/go;
+    $run =~s/300x300dpi_(draft|high|mono|photo|std|tshirt)([^?&_]*)/{300|$1|$2}/go;
+    $run =~s/360x360dpi_(draft|high|mono|photo|std|tshirt)([^?&_]*)/{360|$1|$2}/go;
+    $run =~s/\{([0-9]*)\|([^|])[^|]*\|([^?&_]*)\}/{$1$2$3}/go;
+    return $run;
 }
 
 sub find_min_nonunique_prefix(@) {
     my (@sums) = sort @_;
-    my ($last) = "";
-    my ($max_prefix) = 0;
-    my ($lc) = 0;
+    my ($last) = shift @sums;
+    my ($maxlen) = length $last;
+    my ($max_prefix) = 1;
 
     foreach my $sum (@sums) {
-	$lc++;
-	if ($last eq "") {
-	    $last = $sum;
-	    next;
-	}
-	foreach my $l ($max_prefix..length $sum) {
-	    if (substr($sum, 0, $l) eq substr($last, 0, $l)) {
-		$max_prefix++;
-	    } else {
-		next;
-	    }
+	foreach my $l ($max_prefix..$maxlen) {
+	    last if (substr($sum, 0, $l) ne substr($last, 0, $l));
+	    $max_prefix++;
 	}
 	$last = $sum;
     }
@@ -85,32 +258,216 @@ sub get_mode($) {
     return $mode;
 }
 
-my $min_prefix = find_min_nonunique_prefix(keys %checksums);
-
-my ($last_sum) = "";
+sub scan_next_block($$$$$$$) {
+    my ($runs, $printers, $start, $blocksize, $ovalue, $rest, $ov1) = @_;
+    my ($limit) = scalar @$runs;
+    # Don't have enough runs left
+    if ($start + $blocksize > $limit) {
+	return "";
+    }
+    my ($run0) = $$runs[$start];
+    $run0 =~ /^([^:]+):(.*)$/o;
+    my ($data) = $1;
+    my ($printer) = $2;
+    # Printer does not match the first printer on the list.
+    if ($printer ne $$printers[0]) {
+	return "";
+    }
+    my (@elts) = split(/_/, $data);
+    my ($v1) = $elts[1];
+    if ($v1 ne $ov1) {
+	return "";
+    }
+    my ($option) = $elts[2];
+    my ($value) = $elts[3];
+    if ($ovalue eq $value) {
+	return "";
+    }
+    my ($nrest) = join("", @elts[4..$#elts]);
+    if ($nrest ne $rest) {
+	return "";
+    }
+    # Different option
+    my $i;
+    for ($i = 1; $i < $blocksize; $i++) {
+	my ($run) = $$runs[$start + $i];
+	$run =~ /^([^:]+):(.*)$/o;
+	$data = $1;
+	$printer = $2;
+	# Different printer that what we expect!
+	if ($printer ne $$printers[$i]) {
+	    return "";
+	}
+	@elts = split(/_/, $data);
+	my ($nv1) = $elts[1];
+	my $noption = $elts[2];
+	if ($nv1 ne $v1) {
+	    return "";
+	}
+	# Different option than what we expect.
+	if ($noption ne $option) {
+	    return "";
+	}
+	my ($nvalue) = $elts[3];
+	# ...or different value from what we expect
+	if ($nvalue ne $value) {
+	    return "";
+	}
+	# ...or something else different, perchance?
+	$nrest = join("", @elts[4..$#elts]);
+	if ($nrest ne $rest) {
+	    return "";
+	}
+    }
+    return [$option, $value];
+}
 
-my (%sub_table);
-$sub_table{"PrintingMode"} = "P;";
-$sub_table{"PageSize"} = "S;";
-$sub_table{"DitherAlgorithm"} = "D;";
-$sub_table{"Resolution"} = "R;";
-$sub_table{"ColorCorrection"} = "C;";
-$sub_table{"Color"} = "c;";
-$sub_table{"MediaType"} = "M;";
-$sub_table{"InputSlot"} = "I;";
-
-sub compress_chunk($) {
-    my ($chunk) = @_;
-    if (defined $sub_table{$chunk}) {
-	return $sub_table{$chunk};
+# If we find a match, return the data: glommed-together option string,
+# size of the block, and number of values.
+#
+# If we don't find a match, return how far we searched without finding
+# a match, so that the compressor doesn't spend a lot of time searching
+# for a match where there isn't going to be one.
+sub scan_for_block($$) {
+    my ($runs, $start) = @_;
+    my ($first_printer);
+    my ($idx) = $start;
+    my ($limit) = scalar @$runs;
+    # Runs are now of the form
+    # PrintingMode_XX_OPT1_YY...:printer
+    # First scan pass: look for the first change of YY while OPT1 remains
+    # the same.  The printer should be the same as the first printer we
+    # find.
+    my ($run0) = $$runs[$start];
+    $run0 =~ /^([^:]+):(.*)$/o;
+    my ($data) = $1;
+    my ($printer) = $2;
+    my (@elts) = split(/_/, $data);
+    my ($v1) = $elts[1];
+    my ($option) = $elts[2];
+    my ($value) = $elts[3];
+    my ($rest) = join("", @elts[4..$#elts]);
+    my (@printers) = ($printer);
+    my (@values) = ([$option, $value]);
+    $idx++;
+    while ($idx < $limit) {
+	my ($run) = $$runs[$idx];
+	$run =~ /^([^:]+):(.*)$/o;
+	$data = $1;
+	$printer = $2;
+	@elts = split(/_/, $data);
+	my ($nv1) = $elts[1];
+	my ($noption) = $elts[2];
+	my ($nvalue) = $elts[3];
+	if ($v1 ne $nv1) {
+	    return ("", 0, $idx);
+	}
+	# Different option name -- don't have a match.
+	if ($noption ne $option) {
+	    return ("", 0, $idx);
+	}
+	my ($nrest) = join("", @elts[4..$#elts]);
+	if ($nrest ne $rest) {
+	    return ("", 0, $idx);
+	}
+	if ($printer ne $printers[0]) {
+	    if ($nvalue ne $value) {
+		# Different option value, but not the same printer -- don't have
+		# a match.
+		return ("", 0, $idx);
+	    } else {
+		# Same option value, new printer
+		push @printers, $printer;
+		$idx++;
+	    }
+	} else {
+	    if ($nvalue eq $value) {
+		return ("", 0, $idx);
+	    }
+	    # New option value but same printer as first -- found a block!
+	    last;
+	}
+    }
+    my ($blocksize) = $#printers + 1;
+    if ($blocksize < 2) {
+	return ( "", 0, $idx );
+    }
+    my $ovalue = "";
+    while ($idx < $limit) {
+	$value = scan_next_block($runs, \@printers, $idx, $blocksize, $ovalue,
+				 $rest, $v1);
+	if (ref $value eq "ARRAY") {
+	    push @values, $value;
+	    $ovalue = $value->[1];
+	    $idx += $blocksize;
+	} else {
+	    # End of the line.
+	    if ($#values > 0) {
+		last;
+	    } else {
+		return ( "", 0, $idx );
+	    }
+	}
+    }
+    if ($#values > 0) {
+	my (@optstrs);
+	my (@valstrs);
+	my ($last_option) = "";
+	foreach my $item (@values) {
+	    my ($opt) = $item->[0];
+	    my ($val) = $item->[1];
+	    if ($last_option ne $opt) {
+		if ($last_option ne "") {
+		    if ($comp_level >= 5 && defined $sub_table{$last_option}) {
+			$last_option = $sub_table{$last_option};
+		    }
+		    push @optstrs, "${last_option}_" . join("&", @valstrs);
+		}
+		$last_option = $opt;
+		@valstrs = ();
+	    }
+	    push @valstrs, $val;
+	}
+	if ($comp_level >= 5 && defined $sub_table{$last_option}) {
+	    $last_option = $sub_table{$last_option};
+	}
+	push @optstrs, "${last_option}_" . join("&", @valstrs);;
+	my ($valstr) = join("?", @optstrs);
+	return ( $valstr, $blocksize, $#values + 1 );
     } else {
-	return $chunk;
+	return ( "", 0, $idx );
     }
 }
 
+GetOptions("l:i" => \$comp_level);
+
+if ($comp_level <= 0) {
+    print while (<>);
+    exit;
+}
+
+while (<>) {
+    chomp;
+    my ($sum, $key) = split;
+    if (! defined $checksums{$sum}) {
+	$checksums{$sum} = [];
+    }
+    push @{$checksums{$sum}}, $key;
+}
+
+my $min_prefix;
+
+if ($comp_level >= 2) {
+    $min_prefix = find_min_nonunique_prefix(keys %checksums);
+} else {			# $comp_level == 1
+    $min_prefix = length((keys %checksums)[0]);
+}
+
+my ($last_sum) = "";
+
 foreach my $sum (sort keys %checksums) {
     my ($out) = substr($sum, 0, $min_prefix);
-    if ($last_sum ne "") {
+    if ($last_sum ne "" && $comp_level >= 4) {
 	$out = substr($out, find_min_nonunique_prefix($sum, $last_sum) - 1);
     }
     $last_sum = $sum;
@@ -122,66 +479,140 @@ foreach my $sum (sort keys %checksums) {
 	s/\.prn$//;
 	my ($printer) = get_printer($_);
 	my ($mode) = get_mode($_);
+	if ($comp_level >= 6) {
+	    $mode = precompress($mode);
+	}
 	"${mode}_:$printer"
     } @{$checksums{$sum}};
-    foreach my $run (@runs) {
+    my ($lastdup) = 0;
+    my ($valstr) = "";
+    my ($blocksize) = 0;
+    my ($valcount) = 0;
+    my ($stop_at) = -1;
+    my ($idx);
+    my ($found_block_first_time) = 0;
+    my ($next_check) = 0;
+    while ($idx <= $#runs) {
+	my $run = $runs[$idx];
 	my (@chunks) = split(/[_]/, $run);
-	@chunks = map { compress_chunk($_) } @chunks;
+	if ($comp_level >= 3 && $stop_at < 0 &&
+	    $idx >= $next_check &&
+	    $#runs - $idx > 2 &&
+	    $pchunks[2] ne $chunks[2]) {
+	    ($valstr, $blocksize, $valcount) = scan_for_block(\@runs, $idx);
+	    if ($blocksize > 0) {
+		$chunks[2] = "";
+		$chunks[3] = $valstr;
+		$stop_at = $idx + $blocksize;
+		$found_block_first_time = 1;
+	    } else {
+		$next_check = $valcount;
+	    }
+	}
+	if ($comp_level >= 5) {
+	    map {
+		if (defined $sub_table{$chunks[$_]}) {
+		    $chunks[$_] = $sub_table{$chunks[$_]};
+		}
+	    } (0..$#chunks);
+	}
 	my ($mchunks) = min(scalar @pchunks, scalar @chunks);
 	my ($i) = 0;
 	$out .= " ";
+	if ($stop_at > 0) {
+	    if ($found_block_first_time) {
+		map { $pchunks[$_] = "" } (2..$#pchunks);
+		$chunks[2] = "($chunks[2]";
+		$chunks[3] = $valstr;
+	    } else {
+		$chunks[2] = "";
+		$chunks[3] = "";
+	    }
+	}
+
 	while ($i < $mchunks) {
 	    if ($i > 0) {
 		$out .= '_';
 	    }
 	    if ($chunks[$i] ne $pchunks[$i]) {
-		my ($plen) = length $pchunks[$i];
-		my ($len) = length $chunks[$i];
-		my ($maxc) = min($len, $plen);
-		my ($dup) = 0;
-		my ($tail) = 0;
-		while ($dup < $maxc) {
-		    last if (substr($pchunks[$i], $dup, 1) ne
-			     substr($chunks[$i], $dup, 1));
-		    $dup++;
-		}
-		while ($tail < $maxc - $dup) {
-		    last if (substr($pchunks[$i], -1 - $tail, 1) ne
-			     substr($chunks[$i], -1 - $tail, 1));
-		    $tail++;
-		}
-		my ($chunk) = $chunks[$i];
-		if ($tail == 2) {
-		    $chunk = substr($chunk, 0, -$tail) . '\\';
-		} elsif ($tail > 2) {
-		    $chunk = substr($chunk, 0, -$tail) . "/$tail";
-		}
-		if ($dup == 2) {
-		    $out .= '*' . substr($chunk, $dup);
-		} elsif ($dup > 2) {
-		    $out .= "$dup+" . substr($chunk, $dup);
+		if ($comp_level >= 7) {
+		    my ($plen) = length $pchunks[$i];
+		    my ($len) = length $chunks[$i];
+		    my ($maxc) = $len;
+		    $maxc = $plen if $plen< $len;
+		    my ($dup) = 0;
+		    my ($chunk) = $chunks[$i];
+		    if (substr($pchunks[$i], 0, 1) ne '(') {
+			while ($dup < $maxc) {
+			    last if (substr($pchunks[$i], $dup, 1) ne
+				     substr($chunks[$i], $dup, 1));
+			    $dup++;
+			}
+			if (! $found_block_first_time) {
+			    my ($tail) = 0;
+			    while ($tail < $maxc - $dup) {
+				last if (substr($pchunks[$i], -1 - $tail, 1) ne
+					 substr($chunks[$i], -1 - $tail, 1));
+				$tail++;
+			    }
+			    if ($tail == 2) {
+				$chunk = substr($chunk, 0, -$tail) . '\\';
+			    } elsif ($tail > 2) {
+				$chunk = substr($chunk, 0, -$tail) . "/$tail";
+			    }
+			}
+		    }
+		    if ($dup == 2) {
+			$out .= '*' . substr($chunk, $dup);
+		    } elsif ($dup > 2) {
+			if ($dup == $lastdup) {
+			    $out .= "+" . substr($chunk, $dup);
+			} else {
+			    $out .= "$dup+" . substr($chunk, $dup);
+			}
+		    } else {
+			$out .= "$chunk";
+		    }
+		    $lastdup = $dup;
 		} else {
-		    $out .= "$chunk";
+		    $out .= $chunks[$i];
 		}
-	    } else {
-		$out .= "";
 	    }
 	    $i++;
 	}
+	$found_block_first_time = 0;
+	@pchunks = @chunks;
 	if ($i < scalar @chunks) {
 	    if ($i > 0) {
 		$out .= '_';
 	    }
 	    $out .= join("_", @chunks[$i..$#chunks]);
 	}
-	$out =~ s/________/=/g;
-	$out =~ s/_______/!/g;
-	$out =~ s/______/"/g;
-	$out =~ s/_____/,/g;
-	$out =~ s/____/'/g;
-	$out =~ s/___/%/g;
-	$out =~ s/__/>/g;
-	@pchunks = @chunks;
+	$idx++;
+	if ($blocksize > 0 && $idx == $stop_at) {
+	    $out .= ")";
+	    $idx += ($blocksize * ($valcount - 1));
+	    $stop_at = -1;
+	}
+    }
+    $out =~ s/\(_/(/g;
+    if ($comp_level >= 7) {
+	$out =~ s/________/=/go;
+	$out =~ s/_______/!/go;
+	$out =~ s/______/"/go;
+	$out =~ s/_____/,/go;
+	$out =~ s/____/'/go;
+	$out =~ s/___/%/go;
+	$out =~ s/__/>/go;
+	$out =~ s/^([0-9a-f]+ )P;_BW_/$1./;
+	$out =~ s/^([0-9a-f]+ )P;_c;_/$1,/;
+	if ($comp_level >= 8) {
+	    $out =~ s/(.) /$asc_table{$1}/g;
+	    if ($comp_level >= 9) {
+		$out = psub($out);
+	    }
+	}
     }
+    $out =~ s/\|$//;
     print "$out\n";
 }