summaryrefslogtreecommitdiff
path: root/scripts/dtrx
diff options
context:
space:
mode:
Diffstat (limited to 'scripts/dtrx')
-rwxr-xr-xscripts/dtrx220
1 files changed, 132 insertions, 88 deletions
diff --git a/scripts/dtrx b/scripts/dtrx
index 70e7965..f053989 100755
--- a/scripts/dtrx
+++ b/scripts/dtrx
@@ -2,8 +2,8 @@
# -*- coding: utf-8 -*-
#
# dtrx -- Intelligently extract various archive types.
-# Copyright ⓒ 2006, 2007, 2008 Brett Smith <brettcsmith@brettcsmith.org>
-# Copyright ⓒ 2008 Peter Kelemen <Peter.Kelemen@gmail.com>
+# Copyright © 2006-2009 Brett Smith <brettcsmith@brettcsmith.org>
+# Copyright © 2008 Peter Kelemen <Peter.Kelemen@gmail.com>
#
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
@@ -21,6 +21,7 @@
# Python 2.3 string methods: 'rfind', 'rindex', 'rjust', 'rstrip'
import errno
+import fcntl
import logging
import mimetypes
import optparse
@@ -29,9 +30,12 @@ import re
import shutil
import signal
import stat
+import string
+import struct
import subprocess
import sys
import tempfile
+import termios
import textwrap
import traceback
@@ -40,10 +44,10 @@ try:
except NameError:
from sets import Set as set
-VERSION = "6.4"
+VERSION = "6.5"
VERSION_BANNER = """dtrx version %s
-Copyright ⓒ 2006, 2007, 2008 Brett Smith <brettcsmith@brettcsmith.org>
-Copyright ⓒ 2008 Peter Kelemen <Peter.Kelemen@gmail.com>
+Copyright © 2006-2009 Brett Smith <brettcsmith@brettcsmith.org>
+Copyright © 2008 Peter Kelemen <Peter.Kelemen@gmail.com>
This program is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by the
@@ -168,12 +172,21 @@ class BaseExtractor(object):
return index
return None
+ def add_process(self, processes, command, stdin, stdout):
+ try:
+ processes.append(subprocess.Popen(command, stdin=stdin,
+ stdout=stdout,
+ stderr=self.stderr))
+ except OSError, error:
+ if error.errno == errno.ENOENT:
+ raise ExtractorUnusable("could not run %s" % (command[0],))
+ raise
+
def run_pipes(self, final_stdout=None):
if not self.pipes:
return
elif final_stdout is None:
- # FIXME: Buffering this might be dumb.
- final_stdout = tempfile.TemporaryFile()
+ final_stdout = open('/dev/null', 'w')
num_pipes = len(self.pipes)
last_pipe = num_pipes - 1
processes = []
@@ -186,14 +199,7 @@ class BaseExtractor(object):
stdout = final_stdout
else:
stdout = subprocess.PIPE
- try:
- processes.append(subprocess.Popen(command, stdin=stdin,
- stdout=stdout,
- stderr=self.stderr))
- except OSError, error:
- if error.errno == errno.ENOENT:
- raise ExtractorUnusable("could not run %s" % (command[0],))
- raise
+ self.add_process(processes, command, stdin, stdout)
self.exit_codes = [pipe.wait() for pipe in processes]
self.archive.close()
for index in range(last_pipe):
@@ -285,17 +291,25 @@ class BaseExtractor(object):
self.archive.close()
os.chdir(old_path)
- def get_filenames(self):
- self.pipe(self.list_pipe, "listing")
- self.run_pipes()
- self.check_success(False)
- self.archive.seek(0, 0)
+ def get_filenames(self, internal=False):
+ if not internal:
+ self.pipe(self.list_pipe, "listing")
+ processes = []
+ stdin = self.archive
+ for command in [pipe[0] for pipe in self.pipes]:
+ self.add_process(processes, command, stdin, subprocess.PIPE)
+ stdin = processes[-1].stdout
+ get_output_line = processes[-1].stdout.readline
while True:
- line = self.archive.readline()
+ line = get_output_line()
if not line:
- self.archive.close()
- return
+ break
yield line.rstrip('\n')
+ self.exit_codes = [pipe.wait() for pipe in processes]
+ self.archive.close()
+ for process in processes:
+ process.stdout.close()
+ self.check_success(False)
class CompressionExtractor(BaseExtractor):
@@ -377,11 +391,25 @@ class RPMExtractor(CpioExtractor):
class DebExtractor(TarExtractor):
file_type = 'Debian package'
+ data_re = re.compile(r'^data\.tar\.[a-z0-9]+$')
def prepare(self):
- self.pipe(['ar', 'p', self.filename, 'data.tar.gz'],
- "data.tar.gz extraction")
- self.pipe(['zcat'], "data.tar.gz decompression")
+ self.pipe(['ar', 't', self.filename], "finding package data file")
+ for filename in self.get_filenames(internal=True):
+ if self.data_re.match(filename):
+ data_filename = filename
+ break
+ else:
+ raise ExtractorError(".deb contains no data.tar file")
+ self.archive.seek(0, 0)
+ self.pipes.pop()
+ # self.pipes = start_pipes
+ encoding = mimetypes.guess_type(data_filename)[1]
+ if not encoding:
+ raise ExtractorError("data.tar file has unrecognized encoding")
+ self.pipe(['ar', 'p', self.filename, data_filename],
+ "extracting data.tar from .deb")
+ self.pipe([self.decoders[encoding]], "decoding data.tar")
def basename(self):
pieces = os.path.basename(self.filename).split('_')
@@ -471,7 +499,7 @@ class SevenExtractor(NoPipeExtractor):
if fn_index is not None:
break
else:
- fn_index = line.rindex(' ') + 1
+ fn_index = string.rindex(line, ' ') + 1
elif fn_index is not None:
yield line[fn_index:]
self.archive.close()
@@ -661,11 +689,16 @@ class BombHandler(BaseHandler):
class BasePolicy(object):
try:
- width = int(os.environ['COLUMNS'])
- except (KeyError, ValueError):
+ size = fcntl.ioctl(sys.stdout.fileno(), termios.TIOCGWINSZ,
+ struct.pack("HHHH", 0, 0, 0, 0))
+ width = struct.unpack("HHHH", size)[1]
+ except IOError:
width = 80
- wrapper = textwrap.TextWrapper(width=width - 1)
-
+ width = width - 1
+ choice_wrapper = textwrap.TextWrapper(width=width, initial_indent=' * ',
+ subsequent_indent=' ',
+ break_long_words=False)
+
def __init__(self, options):
self.current_policy = None
if options.batch:
@@ -673,15 +706,10 @@ class BasePolicy(object):
else:
self.permanent_policy = None
- def wrap(self, question, filename):
- # Note: This function assumes the filename is the first thing in the
- # question text, and that's the only place it appears.
- if len(self.wrapper.wrap(filename + ' a')) > 1:
- return [filename] + self.wrapper.wrap(question[3:])
- return self.wrapper.wrap(question % (filename,))
-
def ask_question(self, question):
- question = question + self.choices
+ question = question + ["You can:"]
+ for choice in self.choices:
+ question.extend(self.choice_wrapper.wrap(choice))
while True:
print "\n".join(question)
try:
@@ -693,6 +721,19 @@ class BasePolicy(object):
except KeyError:
print
+ def wrap(self, question, *args):
+ words = question.split()
+ for arg in args:
+ words[words.index('%s')] = arg
+ result = [words.pop(0)]
+ for word in words:
+ extend = '%s %s' % (result[-1], word)
+ if len(extend) > self.width:
+ result.append(word)
+ else:
+ result[-1] = extend
+ return result
+
def __cmp__(self, other):
return cmp(self.current_policy, other)
@@ -700,10 +741,9 @@ class BasePolicy(object):
class OneEntryPolicy(BasePolicy):
answers = {'h': EXTRACT_HERE, 'i': EXTRACT_WRAP, 'r': EXTRACT_RENAME,
'': EXTRACT_WRAP}
- choices = ["You can:",
- " * extract it Inside another directory",
- " * extract it and Rename the directory",
- " * extract it Here"]
+ choice_template = ["extract the %s _I_nside a new directory named %s",
+ "extract the %s and _R_ename it %s",
+ "extract the %s _H_ere"]
prompt = "What do you want to do? (I/r/h) "
def __init__(self, options):
@@ -724,11 +764,14 @@ class OneEntryPolicy(BasePolicy):
raise ValueError("bad value %s for default policy" % (default,))
def prep(self, archive_filename, extractor):
- question = self.wrap(("%%s contains one %s, but its name " +
- "doesn't match.") %
- (extractor.content_type,), archive_filename)
+ question = self.wrap(
+ "%s contains one %s but its name doesn't match.",
+ archive_filename, extractor.content_type)
question.append(" Expected: " + extractor.basename())
question.append(" Actual: " + extractor.content_name)
+ choice_vars = (extractor.content_type, extractor.basename())
+ self.choices = [text % choice_vars[:text.count('%s')]
+ for text in self.choice_template]
self.current_policy = (self.permanent_policy or
self.ask_question(question))
@@ -739,12 +782,11 @@ class OneEntryPolicy(BasePolicy):
class RecursionPolicy(BasePolicy):
answers = {'o': RECURSE_ONCE, 'a': RECURSE_ALWAYS, 'n': RECURSE_NOT_NOW,
'v': RECURSE_NEVER, 'l': RECURSE_LIST, '': RECURSE_NOT_NOW}
- choices = ["You can:",
- " * Always extract included archives",
- " * extract included archives this Once",
- " * choose Not to extract included archives",
- " * neVer extract included archives",
- " * List included archives"]
+ choices = ["_A_lways extract included archives during this session",
+ "extract included archives this _O_nce",
+ "choose _N_ot to extract included archives this once",
+ "ne_V_er extract included archives during this session",
+ "_L_ist included archives"]
prompt = "What do you want to do? (a/o/N/v/l) "
def __init__(self, options):
@@ -759,10 +801,9 @@ class RecursionPolicy(BasePolicy):
if (self.permanent_policy is not None) or (archive_count == 0):
self.current_policy = self.permanent_policy or RECURSE_NOT_NOW
return
- question = self.wrap(("%%s contains %s other archive file(s), " +
- "out of %s file(s) total.") %
- (archive_count, extractor.file_count),
- current_filename)
+ question = self.wrap(
+ "%s contains %s other archive file(s), out of %s file(s) total.",
+ current_filename, archive_count, extractor.file_count)
if target == '.':
target = ''
included_root = extractor.included_root
@@ -840,8 +881,10 @@ class ExtractorBuilder(object):
for extension in ext_info.get('extensions', ()):
extension_map.setdefault(extension, []).append((ext_name, None))
- for mapping in (('tar', 'bzip2', 'tar.bz2'),
+ for mapping in (('tar', 'bzip2', 'tar.bz2', 'tbz2', 'tb2', 'tbz'),
('tar', 'gzip', 'tar.gz', 'tgz'),
+ ('tar', 'lzma', 'tar.lzma', 'tlz'),
+ ('tar', 'compress', 'tar.Z', 'taz'),
('compress', 'gzip', 'Z', 'gz'),
('compress', 'bzip2', 'bz2'),
('compress', 'lzma', 'lzma')):
@@ -936,6 +979,7 @@ class BaseAction(object):
self.options = options
self.filenames = filenames
self.target = None
+ self.do_print = False
def report(self, function, *args):
try:
@@ -945,15 +989,20 @@ class BaseAction(object):
logger.debug(''.join(traceback.format_exception(*sys.exc_info())))
return error
+ def show_filename(self, filename):
+ if len(self.filenames) < 2:
+ return
+ elif self.do_print:
+ print
+ else:
+ self.do_print = True
+ print "%s:" % (filename,)
+
class ExtractionAction(BaseAction):
handlers = [FlatHandler, OverwriteHandler, MatchHandler, EmptyHandler,
BombHandler]
- def __init__(self, options, filenames):
- BaseAction.__init__(self, options, filenames)
- self.did_print = False
-
def get_handler(self, extractor):
if extractor.content_type in ONE_ENTRY_UNKNOWN:
self.options.one_entry_policy.prep(self.current_filename,
@@ -967,11 +1016,7 @@ class ExtractionAction(BaseAction):
def show_extraction(self, extractor):
if self.options.log_level > logging.INFO:
return
- elif self.did_print:
- print
- else:
- self.did_print = True
- print "%s:" % (self.current_filename,)
+ self.show_filename(self.current_filename)
if extractor.contents is None:
print self.current_handler.target
return
@@ -1007,29 +1052,28 @@ class ExtractionAction(BaseAction):
class ListAction(BaseAction):
- def __init__(self, options, filenames):
- BaseAction.__init__(self, options, filenames)
- self.count = 0
-
- def get_list(self, extractor):
- # Note: The reason I'm getting all the filenames up front is
- # because if we run into trouble partway through the archive, we'll
- # try another extractor. So before we display anything we have to
- # be sure this one is successful. We maybe don't have to be quite
- # this conservative but this is the easy way out for now.
- self.filelist = list(extractor.get_filenames())
-
- def show_list(self, filename):
- self.count += 1
- if len(self.filenames) != 1:
- if self.count > 1:
- print
- print "%s:" % (filename,)
- print '\n'.join(self.filelist)
-
+ def list_filenames(self, extractor, filename):
+ # We get a line first to make sure there's not going to be some
+ # basic error before we show what filename we're listing.
+ filename_lister = extractor.get_filenames()
+ try:
+ first_line = filename_lister.next()
+ except StopIteration:
+ self.show_filename(filename)
+ else:
+ self.did_list = True
+ self.show_filename(filename)
+ print first_line
+ for line in filename_lister:
+ print line
+
def run(self, filename, extractor):
- return (self.report(self.get_list, extractor) or
- self.report(self.show_list, filename))
+ self.did_list = False
+ error = self.report(self.list_filenames, extractor, filename)
+ if error and self.did_list:
+ logger.error("lister failed: ignore above listing for %s" %
+ (filename,))
+ return error
class ExtractorApplication(object):