1 files changed, 132 insertions, 88 deletions
diff --git a/scripts/dtrx b/scripts/dtrx
index 70e7965..f053989 100755
--- a/scripts/dtrx
+++ b/scripts/dtrx
@@ -2,8 +2,8 @@
 # -*- coding: utf-8 -*-
 #
 # dtrx -- Intelligently extract various archive types.
-# Copyright ⓒ 2006, 2007, 2008 Brett Smith <brettcsmith@brettcsmith.org>
-# Copyright ⓒ 2008 Peter Kelemen <Peter.Kelemen@gmail.com>
+# Copyright © 2006-2009 Brett Smith <brettcsmith@brettcsmith.org>
+# Copyright © 2008 Peter Kelemen <Peter.Kelemen@gmail.com>
 #
 # This program is free software; you can redistribute it and/or modify it
 # under the terms of the GNU General Public License as published by the
@@ -21,6 +21,7 @@
 # Python 2.3 string methods: 'rfind', 'rindex', 'rjust', 'rstrip'
 
 import errno
+import fcntl
 import logging
 import mimetypes
 import optparse
@@ -29,9 +30,12 @@ import re
 import shutil
 import signal
 import stat
+import string
+import struct
 import subprocess
 import sys
 import tempfile
+import termios
 import textwrap
 import traceback
 
@@ -40,10 +44,10 @@ try:
 except NameError:
     from sets import Set as set
 
-VERSION = "6.4"
+VERSION = "6.5"
 VERSION_BANNER = """dtrx version %s
-Copyright ⓒ 2006, 2007, 2008 Brett Smith <brettcsmith@brettcsmith.org>
-Copyright ⓒ 2008 Peter Kelemen <Peter.Kelemen@gmail.com>
+Copyright © 2006-2009 Brett Smith <brettcsmith@brettcsmith.org>
+Copyright © 2008 Peter Kelemen <Peter.Kelemen@gmail.com>
 
 This program is free software; you can redistribute it and/or modify it
 under the terms of the GNU General Public License as published by the
@@ -168,12 +172,21 @@ class BaseExtractor(object):
                 return index
         return None
 
+    def add_process(self, processes, command, stdin, stdout):
+        try:
+            processes.append(subprocess.Popen(command, stdin=stdin,
+                                              stdout=stdout,
+                                              stderr=self.stderr))
+        except OSError, error:
+            if error.errno == errno.ENOENT:
+                raise ExtractorUnusable("could not run %s" % (command[0],))
+            raise
+
     def run_pipes(self, final_stdout=None):
         if not self.pipes:
             return
         elif final_stdout is None:
-            # FIXME: Buffering this might be dumb.
-            final_stdout = tempfile.TemporaryFile()
+            final_stdout = open('/dev/null', 'w')
         num_pipes = len(self.pipes)
         last_pipe = num_pipes - 1
         processes = []
@@ -186,14 +199,7 @@ class BaseExtractor(object):
                 stdout = final_stdout
             else:
                 stdout = subprocess.PIPE
-            try:
-                processes.append(subprocess.Popen(command, stdin=stdin,
-                                                  stdout=stdout,
-                                                  stderr=self.stderr))
-            except OSError, error:
-                if error.errno == errno.ENOENT:
-                    raise ExtractorUnusable("could not run %s" % (command[0],))
-                raise
+            self.add_process(processes, command, stdin, stdout)
         self.exit_codes = [pipe.wait() for pipe in processes]
         self.archive.close()
         for index in range(last_pipe):
@@ -285,17 +291,25 @@ class BaseExtractor(object):
         self.archive.close()
         os.chdir(old_path)
 
-    def get_filenames(self):
-        self.pipe(self.list_pipe, "listing")
-        self.run_pipes()
-        self.check_success(False)
-        self.archive.seek(0, 0)
+    def get_filenames(self, internal=False):
+        if not internal:
+            self.pipe(self.list_pipe, "listing")
+        processes = []
+        stdin = self.archive
+        for command in [pipe[0] for pipe in self.pipes]:
+            self.add_process(processes, command, stdin, subprocess.PIPE)
+            stdin = processes[-1].stdout
+        get_output_line = processes[-1].stdout.readline
         while True:
-            line = self.archive.readline()
+            line = get_output_line()
             if not line:
-                self.archive.close()
-                return
+                break
             yield line.rstrip('\n')
+        self.exit_codes = [pipe.wait() for pipe in processes]
+        self.archive.close()
+        for process in processes:
+            process.stdout.close()
+        self.check_success(False)
     
 
 class CompressionExtractor(BaseExtractor):
@@ -377,11 +391,25 @@ class RPMExtractor(CpioExtractor):
 
 class DebExtractor(TarExtractor):
     file_type = 'Debian package'
+    data_re = re.compile(r'^data\.tar\.[a-z0-9]+$')
 
     def prepare(self):
-        self.pipe(['ar', 'p', self.filename, 'data.tar.gz'],
-                  "data.tar.gz extraction")
-        self.pipe(['zcat'], "data.tar.gz decompression")
+        self.pipe(['ar', 't', self.filename], "finding package data file")
+        for filename in self.get_filenames(internal=True):
+            if self.data_re.match(filename):
+                data_filename = filename
+                break
+        else:
+            raise ExtractorError(".deb contains no data.tar file")
+        self.archive.seek(0, 0)
+        self.pipes.pop()
+        # self.pipes = start_pipes
+        encoding = mimetypes.guess_type(data_filename)[1]
+        if not encoding:
+            raise ExtractorError("data.tar file has unrecognized encoding")
+        self.pipe(['ar', 'p', self.filename, data_filename],
+                  "extracting data.tar from .deb")
+        self.pipe([self.decoders[encoding]], "decoding data.tar")
 
     def basename(self):
         pieces = os.path.basename(self.filename).split('_')
@@ -471,7 +499,7 @@ class SevenExtractor(NoPipeExtractor):
                 if fn_index is not None:
                     break
                 else:
-                    fn_index = line.rindex(' ') + 1
+                    fn_index = string.rindex(line, ' ') + 1
             elif fn_index is not None:
                 yield line[fn_index:]
         self.archive.close()
@@ -661,11 +689,16 @@ class BombHandler(BaseHandler):
         
 class BasePolicy(object):
     try:
-        width = int(os.environ['COLUMNS'])
-    except (KeyError, ValueError):
+        size = fcntl.ioctl(sys.stdout.fileno(), termios.TIOCGWINSZ,
+                           struct.pack("HHHH", 0, 0, 0, 0))
+        width = struct.unpack("HHHH", size)[1]
+    except IOError:
         width = 80
-    wrapper = textwrap.TextWrapper(width=width - 1)
-
+    width = width - 1
+    choice_wrapper = textwrap.TextWrapper(width=width, initial_indent=' * ',
+                                          subsequent_indent='   ',
+                                          break_long_words=False)
+    
     def __init__(self, options):
         self.current_policy = None
         if options.batch:
@@ -673,15 +706,10 @@ class BasePolicy(object):
         else:
             self.permanent_policy = None
 
-    def wrap(self, question, filename):
-        # Note: This function assumes the filename is the first thing in the
-        # question text, and that's the only place it appears.
-        if len(self.wrapper.wrap(filename + ' a')) > 1:
-            return [filename] + self.wrapper.wrap(question[3:])
-        return self.wrapper.wrap(question % (filename,))
-
     def ask_question(self, question):
-        question = question + self.choices
+        question = question + ["You can:"]
+        for choice in self.choices:
+            question.extend(self.choice_wrapper.wrap(choice))
         while True:
             print "\n".join(question)
             try:
@@ -693,6 +721,19 @@ class BasePolicy(object):
             except KeyError:
                 print
 
+    def wrap(self, question, *args):
+        words = question.split()
+        for arg in args:
+            words[words.index('%s')] = arg
+        result = [words.pop(0)]
+        for word in words:
+            extend = '%s %s' % (result[-1], word)
+            if len(extend) > self.width:
+                result.append(word)
+            else:
+                result[-1] = extend
+        return result
+
     def __cmp__(self, other):
         return cmp(self.current_policy, other)
     
@@ -700,10 +741,9 @@ class BasePolicy(object):
 class OneEntryPolicy(BasePolicy):
     answers = {'h': EXTRACT_HERE, 'i': EXTRACT_WRAP, 'r': EXTRACT_RENAME,
                '': EXTRACT_WRAP}
-    choices = ["You can:",
-               " * extract it Inside another directory",
-               " * extract it and Rename the directory",
-               " * extract it Here"]
+    choice_template = ["extract the %s _I_nside a new directory named %s",
+                       "extract the %s and _R_ename it %s",
+                       "extract the %s _H_ere"]
     prompt = "What do you want to do?  (I/r/h) "
 
     def __init__(self, options):
@@ -724,11 +764,14 @@ class OneEntryPolicy(BasePolicy):
             raise ValueError("bad value %s for default policy" % (default,))
 
     def prep(self, archive_filename, extractor):
-        question = self.wrap(("%%s contains one %s, but its name " +
-                              "doesn't match.") %
-                             (extractor.content_type,), archive_filename)
+        question = self.wrap(
+            "%s contains one %s but its name doesn't match.",
+            archive_filename, extractor.content_type)
         question.append(" Expected: " + extractor.basename())
         question.append("   Actual: " + extractor.content_name)
+        choice_vars = (extractor.content_type, extractor.basename())
+        self.choices = [text % choice_vars[:text.count('%s')]
+                        for text in self.choice_template]
         self.current_policy = (self.permanent_policy or
                                self.ask_question(question))
 
@@ -739,12 +782,11 @@ class OneEntryPolicy(BasePolicy):
 class RecursionPolicy(BasePolicy):
     answers = {'o': RECURSE_ONCE, 'a': RECURSE_ALWAYS, 'n': RECURSE_NOT_NOW,
                'v': RECURSE_NEVER, 'l': RECURSE_LIST, '': RECURSE_NOT_NOW}
-    choices = ["You can:",
-               " * Always extract included archives",
-               " * extract included archives this Once",
-               " * choose Not to extract included archives",
-               " * neVer extract included archives",
-               " * List included archives"]
+    choices = ["_A_lways extract included archives during this session",
+               "extract included archives this _O_nce",
+               "choose _N_ot to extract included archives this once",
+               "ne_V_er extract included archives during this session",
+               "_L_ist included archives"]
     prompt = "What do you want to do?  (a/o/N/v/l) "
 
     def __init__(self, options):
@@ -759,10 +801,9 @@ class RecursionPolicy(BasePolicy):
         if (self.permanent_policy is not None) or (archive_count == 0):
             self.current_policy = self.permanent_policy or RECURSE_NOT_NOW
             return
-        question = self.wrap(("%%s contains %s other archive file(s), " +
-                              "out of %s file(s) total.") %
-                             (archive_count, extractor.file_count),
-                             current_filename)
+        question = self.wrap(
+            "%s contains %s other archive file(s), out of %s file(s) total.",
+            current_filename, archive_count, extractor.file_count)
         if target == '.':
             target = ''
         included_root = extractor.included_root
@@ -840,8 +881,10 @@ class ExtractorBuilder(object):
         for extension in ext_info.get('extensions', ()):
             extension_map.setdefault(extension, []).append((ext_name, None))
 
-    for mapping in (('tar', 'bzip2', 'tar.bz2'),
+    for mapping in (('tar', 'bzip2', 'tar.bz2', 'tbz2', 'tb2', 'tbz'),
                     ('tar', 'gzip', 'tar.gz', 'tgz'),
+                    ('tar', 'lzma', 'tar.lzma', 'tlz'),
+                    ('tar', 'compress', 'tar.Z', 'taz'),
                     ('compress', 'gzip', 'Z', 'gz'),
                     ('compress', 'bzip2', 'bz2'),
                     ('compress', 'lzma', 'lzma')):
@@ -936,6 +979,7 @@ class BaseAction(object):
         self.options = options
         self.filenames = filenames
         self.target = None
+        self.do_print = False
         
     def report(self, function, *args):
         try:
@@ -945,15 +989,20 @@ class BaseAction(object):
             logger.debug(''.join(traceback.format_exception(*sys.exc_info())))
         return error
 
+    def show_filename(self, filename):
+        if len(self.filenames) < 2:
+            return
+        elif self.do_print:
+            print
+        else:
+            self.do_print = True
+        print "%s:" % (filename,)
+
 
 class ExtractionAction(BaseAction):
     handlers = [FlatHandler, OverwriteHandler, MatchHandler, EmptyHandler,
                 BombHandler]
 
-    def __init__(self, options, filenames):
-        BaseAction.__init__(self, options, filenames)
-        self.did_print = False
-
     def get_handler(self, extractor):
         if extractor.content_type in ONE_ENTRY_UNKNOWN:
             self.options.one_entry_policy.prep(self.current_filename,
@@ -967,11 +1016,7 @@ class ExtractionAction(BaseAction):
     def show_extraction(self, extractor):
         if self.options.log_level > logging.INFO:
             return
-        elif self.did_print:
-            print
-        else:
-            self.did_print = True
-        print "%s:" % (self.current_filename,)
+        self.show_filename(self.current_filename)
         if extractor.contents is None:
             print self.current_handler.target
             return
@@ -1007,29 +1052,28 @@ class ExtractionAction(BaseAction):
 
 
 class ListAction(BaseAction):
-    def __init__(self, options, filenames):
-        BaseAction.__init__(self, options, filenames)
-        self.count = 0
-
-    def get_list(self, extractor):
-        # Note: The reason I'm getting all the filenames up front is
-        # because if we run into trouble partway through the archive, we'll
-        # try another extractor.  So before we display anything we have to
-        # be sure this one is successful.  We maybe don't have to be quite
-        # this conservative but this is the easy way out for now.
-        self.filelist = list(extractor.get_filenames())
-
-    def show_list(self, filename):
-        self.count += 1
-        if len(self.filenames) != 1:
-            if self.count > 1:
-                print
-            print "%s:" % (filename,)
-        print '\n'.join(self.filelist)
-
+    def list_filenames(self, extractor, filename):
+        # We get a line first to make sure there's not going to be some
+        # basic error before we show what filename we're listing.
+        filename_lister = extractor.get_filenames()
+        try:
+            first_line = filename_lister.next()
+        except StopIteration:
+            self.show_filename(filename)
+        else:
+            self.did_list = True
+            self.show_filename(filename)
+            print first_line
+        for line in filename_lister:
+            print line
+            
     def run(self, filename, extractor):
-        return (self.report(self.get_list, extractor) or
-                self.report(self.show_list, filename))
+        self.did_list = False
+        error = self.report(self.list_filenames, extractor, filename)
+        if error and self.did_list:
+            logger.error("lister failed: ignore above listing for %s" %
+                         (filename,))
+        return error
 
 
 class ExtractorApplication(object):