diff options
Diffstat (limited to 'scripts/dtrx')
-rwxr-xr-x | scripts/dtrx | 220 |
1 files changed, 132 insertions, 88 deletions
diff --git a/scripts/dtrx b/scripts/dtrx index 70e7965..f053989 100755 --- a/scripts/dtrx +++ b/scripts/dtrx @@ -2,8 +2,8 @@ # -*- coding: utf-8 -*- # # dtrx -- Intelligently extract various archive types. -# Copyright ⓒ 2006, 2007, 2008 Brett Smith <brettcsmith@brettcsmith.org> -# Copyright ⓒ 2008 Peter Kelemen <Peter.Kelemen@gmail.com> +# Copyright © 2006-2009 Brett Smith <brettcsmith@brettcsmith.org> +# Copyright © 2008 Peter Kelemen <Peter.Kelemen@gmail.com> # # This program is free software; you can redistribute it and/or modify it # under the terms of the GNU General Public License as published by the @@ -21,6 +21,7 @@ # Python 2.3 string methods: 'rfind', 'rindex', 'rjust', 'rstrip' import errno +import fcntl import logging import mimetypes import optparse @@ -29,9 +30,12 @@ import re import shutil import signal import stat +import string +import struct import subprocess import sys import tempfile +import termios import textwrap import traceback @@ -40,10 +44,10 @@ try: except NameError: from sets import Set as set -VERSION = "6.4" +VERSION = "6.5" VERSION_BANNER = """dtrx version %s -Copyright ⓒ 2006, 2007, 2008 Brett Smith <brettcsmith@brettcsmith.org> -Copyright ⓒ 2008 Peter Kelemen <Peter.Kelemen@gmail.com> +Copyright © 2006-2009 Brett Smith <brettcsmith@brettcsmith.org> +Copyright © 2008 Peter Kelemen <Peter.Kelemen@gmail.com> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the @@ -168,12 +172,21 @@ class BaseExtractor(object): return index return None + def add_process(self, processes, command, stdin, stdout): + try: + processes.append(subprocess.Popen(command, stdin=stdin, + stdout=stdout, + stderr=self.stderr)) + except OSError, error: + if error.errno == errno.ENOENT: + raise ExtractorUnusable("could not run %s" % (command[0],)) + raise + def run_pipes(self, final_stdout=None): if not self.pipes: return elif final_stdout is None: - # FIXME: Buffering this might be dumb. - final_stdout = tempfile.TemporaryFile() + final_stdout = open('/dev/null', 'w') num_pipes = len(self.pipes) last_pipe = num_pipes - 1 processes = [] @@ -186,14 +199,7 @@ class BaseExtractor(object): stdout = final_stdout else: stdout = subprocess.PIPE - try: - processes.append(subprocess.Popen(command, stdin=stdin, - stdout=stdout, - stderr=self.stderr)) - except OSError, error: - if error.errno == errno.ENOENT: - raise ExtractorUnusable("could not run %s" % (command[0],)) - raise + self.add_process(processes, command, stdin, stdout) self.exit_codes = [pipe.wait() for pipe in processes] self.archive.close() for index in range(last_pipe): @@ -285,17 +291,25 @@ class BaseExtractor(object): self.archive.close() os.chdir(old_path) - def get_filenames(self): - self.pipe(self.list_pipe, "listing") - self.run_pipes() - self.check_success(False) - self.archive.seek(0, 0) + def get_filenames(self, internal=False): + if not internal: + self.pipe(self.list_pipe, "listing") + processes = [] + stdin = self.archive + for command in [pipe[0] for pipe in self.pipes]: + self.add_process(processes, command, stdin, subprocess.PIPE) + stdin = processes[-1].stdout + get_output_line = processes[-1].stdout.readline while True: - line = self.archive.readline() + line = get_output_line() if not line: - self.archive.close() - return + break yield line.rstrip('\n') + self.exit_codes = [pipe.wait() for pipe in processes] + self.archive.close() + for process in processes: + process.stdout.close() + self.check_success(False) class CompressionExtractor(BaseExtractor): @@ -377,11 +391,25 @@ class RPMExtractor(CpioExtractor): class DebExtractor(TarExtractor): file_type = 'Debian package' + data_re = re.compile(r'^data\.tar\.[a-z0-9]+$') def prepare(self): - self.pipe(['ar', 'p', self.filename, 'data.tar.gz'], - "data.tar.gz extraction") - self.pipe(['zcat'], "data.tar.gz decompression") + self.pipe(['ar', 't', self.filename], "finding package data file") + for filename in self.get_filenames(internal=True): + if self.data_re.match(filename): + data_filename = filename + break + else: + raise ExtractorError(".deb contains no data.tar file") + self.archive.seek(0, 0) + self.pipes.pop() + # self.pipes = start_pipes + encoding = mimetypes.guess_type(data_filename)[1] + if not encoding: + raise ExtractorError("data.tar file has unrecognized encoding") + self.pipe(['ar', 'p', self.filename, data_filename], + "extracting data.tar from .deb") + self.pipe([self.decoders[encoding]], "decoding data.tar") def basename(self): pieces = os.path.basename(self.filename).split('_') @@ -471,7 +499,7 @@ class SevenExtractor(NoPipeExtractor): if fn_index is not None: break else: - fn_index = line.rindex(' ') + 1 + fn_index = string.rindex(line, ' ') + 1 elif fn_index is not None: yield line[fn_index:] self.archive.close() @@ -661,11 +689,16 @@ class BombHandler(BaseHandler): class BasePolicy(object): try: - width = int(os.environ['COLUMNS']) - except (KeyError, ValueError): + size = fcntl.ioctl(sys.stdout.fileno(), termios.TIOCGWINSZ, + struct.pack("HHHH", 0, 0, 0, 0)) + width = struct.unpack("HHHH", size)[1] + except IOError: width = 80 - wrapper = textwrap.TextWrapper(width=width - 1) - + width = width - 1 + choice_wrapper = textwrap.TextWrapper(width=width, initial_indent=' * ', + subsequent_indent=' ', + break_long_words=False) + def __init__(self, options): self.current_policy = None if options.batch: @@ -673,15 +706,10 @@ class BasePolicy(object): else: self.permanent_policy = None - def wrap(self, question, filename): - # Note: This function assumes the filename is the first thing in the - # question text, and that's the only place it appears. - if len(self.wrapper.wrap(filename + ' a')) > 1: - return [filename] + self.wrapper.wrap(question[3:]) - return self.wrapper.wrap(question % (filename,)) - def ask_question(self, question): - question = question + self.choices + question = question + ["You can:"] + for choice in self.choices: + question.extend(self.choice_wrapper.wrap(choice)) while True: print "\n".join(question) try: @@ -693,6 +721,19 @@ class BasePolicy(object): except KeyError: print + def wrap(self, question, *args): + words = question.split() + for arg in args: + words[words.index('%s')] = arg + result = [words.pop(0)] + for word in words: + extend = '%s %s' % (result[-1], word) + if len(extend) > self.width: + result.append(word) + else: + result[-1] = extend + return result + def __cmp__(self, other): return cmp(self.current_policy, other) @@ -700,10 +741,9 @@ class BasePolicy(object): class OneEntryPolicy(BasePolicy): answers = {'h': EXTRACT_HERE, 'i': EXTRACT_WRAP, 'r': EXTRACT_RENAME, '': EXTRACT_WRAP} - choices = ["You can:", - " * extract it Inside another directory", - " * extract it and Rename the directory", - " * extract it Here"] + choice_template = ["extract the %s _I_nside a new directory named %s", + "extract the %s and _R_ename it %s", + "extract the %s _H_ere"] prompt = "What do you want to do? (I/r/h) " def __init__(self, options): @@ -724,11 +764,14 @@ class OneEntryPolicy(BasePolicy): raise ValueError("bad value %s for default policy" % (default,)) def prep(self, archive_filename, extractor): - question = self.wrap(("%%s contains one %s, but its name " + - "doesn't match.") % - (extractor.content_type,), archive_filename) + question = self.wrap( + "%s contains one %s but its name doesn't match.", + archive_filename, extractor.content_type) question.append(" Expected: " + extractor.basename()) question.append(" Actual: " + extractor.content_name) + choice_vars = (extractor.content_type, extractor.basename()) + self.choices = [text % choice_vars[:text.count('%s')] + for text in self.choice_template] self.current_policy = (self.permanent_policy or self.ask_question(question)) @@ -739,12 +782,11 @@ class OneEntryPolicy(BasePolicy): class RecursionPolicy(BasePolicy): answers = {'o': RECURSE_ONCE, 'a': RECURSE_ALWAYS, 'n': RECURSE_NOT_NOW, 'v': RECURSE_NEVER, 'l': RECURSE_LIST, '': RECURSE_NOT_NOW} - choices = ["You can:", - " * Always extract included archives", - " * extract included archives this Once", - " * choose Not to extract included archives", - " * neVer extract included archives", - " * List included archives"] + choices = ["_A_lways extract included archives during this session", + "extract included archives this _O_nce", + "choose _N_ot to extract included archives this once", + "ne_V_er extract included archives during this session", + "_L_ist included archives"] prompt = "What do you want to do? (a/o/N/v/l) " def __init__(self, options): @@ -759,10 +801,9 @@ class RecursionPolicy(BasePolicy): if (self.permanent_policy is not None) or (archive_count == 0): self.current_policy = self.permanent_policy or RECURSE_NOT_NOW return - question = self.wrap(("%%s contains %s other archive file(s), " + - "out of %s file(s) total.") % - (archive_count, extractor.file_count), - current_filename) + question = self.wrap( + "%s contains %s other archive file(s), out of %s file(s) total.", + current_filename, archive_count, extractor.file_count) if target == '.': target = '' included_root = extractor.included_root @@ -840,8 +881,10 @@ class ExtractorBuilder(object): for extension in ext_info.get('extensions', ()): extension_map.setdefault(extension, []).append((ext_name, None)) - for mapping in (('tar', 'bzip2', 'tar.bz2'), + for mapping in (('tar', 'bzip2', 'tar.bz2', 'tbz2', 'tb2', 'tbz'), ('tar', 'gzip', 'tar.gz', 'tgz'), + ('tar', 'lzma', 'tar.lzma', 'tlz'), + ('tar', 'compress', 'tar.Z', 'taz'), ('compress', 'gzip', 'Z', 'gz'), ('compress', 'bzip2', 'bz2'), ('compress', 'lzma', 'lzma')): @@ -936,6 +979,7 @@ class BaseAction(object): self.options = options self.filenames = filenames self.target = None + self.do_print = False def report(self, function, *args): try: @@ -945,15 +989,20 @@ class BaseAction(object): logger.debug(''.join(traceback.format_exception(*sys.exc_info()))) return error + def show_filename(self, filename): + if len(self.filenames) < 2: + return + elif self.do_print: + print + else: + self.do_print = True + print "%s:" % (filename,) + class ExtractionAction(BaseAction): handlers = [FlatHandler, OverwriteHandler, MatchHandler, EmptyHandler, BombHandler] - def __init__(self, options, filenames): - BaseAction.__init__(self, options, filenames) - self.did_print = False - def get_handler(self, extractor): if extractor.content_type in ONE_ENTRY_UNKNOWN: self.options.one_entry_policy.prep(self.current_filename, @@ -967,11 +1016,7 @@ class ExtractionAction(BaseAction): def show_extraction(self, extractor): if self.options.log_level > logging.INFO: return - elif self.did_print: - print - else: - self.did_print = True - print "%s:" % (self.current_filename,) + self.show_filename(self.current_filename) if extractor.contents is None: print self.current_handler.target return @@ -1007,29 +1052,28 @@ class ExtractionAction(BaseAction): class ListAction(BaseAction): - def __init__(self, options, filenames): - BaseAction.__init__(self, options, filenames) - self.count = 0 - - def get_list(self, extractor): - # Note: The reason I'm getting all the filenames up front is - # because if we run into trouble partway through the archive, we'll - # try another extractor. So before we display anything we have to - # be sure this one is successful. We maybe don't have to be quite - # this conservative but this is the easy way out for now. - self.filelist = list(extractor.get_filenames()) - - def show_list(self, filename): - self.count += 1 - if len(self.filenames) != 1: - if self.count > 1: - print - print "%s:" % (filename,) - print '\n'.join(self.filelist) - + def list_filenames(self, extractor, filename): + # We get a line first to make sure there's not going to be some + # basic error before we show what filename we're listing. + filename_lister = extractor.get_filenames() + try: + first_line = filename_lister.next() + except StopIteration: + self.show_filename(filename) + else: + self.did_list = True + self.show_filename(filename) + print first_line + for line in filename_lister: + print line + def run(self, filename, extractor): - return (self.report(self.get_list, extractor) or - self.report(self.show_list, filename)) + self.did_list = False + error = self.report(self.list_filenames, extractor, filename) + if error and self.did_list: + logger.error("lister failed: ignore above listing for %s" % + (filename,)) + return error class ExtractorApplication(object): |