summaryrefslogtreecommitdiff
path: root/scripts
diff options
context:
space:
mode:
Diffstat (limited to 'scripts')
-rwxr-xr-xscripts/dtrx170
1 files changed, 129 insertions, 41 deletions
diff --git a/scripts/dtrx b/scripts/dtrx
index 7a98ba5..2fc99e3 100755
--- a/scripts/dtrx
+++ b/scripts/dtrx
@@ -2,7 +2,7 @@
# -*- coding: utf-8 -*-
#
# dtrx -- Intelligently extract various archive types.
-# Copyright © 2006-2009 Brett Smith <brettcsmith@brettcsmith.org>
+# Copyright © 2006-2011 Brett Smith <brettcsmith@brettcsmith.org>
# Copyright © 2008 Peter Kelemen <Peter.Kelemen@gmail.com>
#
# This program is free software; you can redistribute it and/or modify it
@@ -38,15 +38,16 @@ import tempfile
import termios
import textwrap
import traceback
+import urlparse
try:
set
except NameError:
from sets import Set as set
-VERSION = "6.6"
+VERSION = "7.1"
VERSION_BANNER = """dtrx version %s
-Copyright © 2006-2009 Brett Smith <brettcsmith@brettcsmith.org>
+Copyright © 2006-2011 Brett Smith <brettcsmith@brettcsmith.org>
Copyright © 2008 Peter Kelemen <Peter.Kelemen@gmail.com>
This program is free software; you can redistribute it and/or modify it
@@ -81,6 +82,7 @@ RECURSE_LIST = 5
mimetypes.encodings_map.setdefault('.bz2', 'bzip2')
mimetypes.encodings_map.setdefault('.lzma', 'lzma')
mimetypes.encodings_map.setdefault('.xz', 'xz')
+mimetypes.encodings_map.setdefault('.lz', 'lzip')
mimetypes.types_map.setdefault('.gem', 'application/x-ruby-gem')
logger = logging.getLogger('dtrx-log')
@@ -138,8 +140,8 @@ class ExtractorUnusable(Exception):
EXTRACTION_ERRORS = (ExtractorError, ExtractorUnusable, OSError, IOError)
class BaseExtractor(object):
- decoders = {'bzip2': 'bzcat', 'gzip': 'zcat', 'compress': 'zcat',
- 'lzma': 'lzcat', 'xz': 'xzcat'}
+ decoders = {'bzip2': ['bzcat'], 'gzip': ['zcat'], 'compress': ['zcat'],
+ 'lzma': ['lzcat'], 'xz': ['xzcat'], 'lzip': ['lzip', '-cd']}
name_checker = DirectoryChecker
def __init__(self, filename, encoding):
@@ -161,18 +163,12 @@ class BaseExtractor(object):
raise ExtractorError("could not open %s: %s" %
(filename, error.strerror))
if encoding:
- self.pipe([self.decoders[encoding]], "decoding")
+ self.pipe(self.decoders[encoding], "decoding")
self.prepare()
def pipe(self, command, description="extraction"):
self.pipes.append((command, description))
- def first_bad_exit_code(self):
- for index, code in enumerate(self.exit_codes):
- if code != 0:
- return index
- return None
-
def add_process(self, processes, command, stdin, stdout):
try:
processes.append(subprocess.Popen(command, stdin=stdin,
@@ -243,7 +239,16 @@ class BaseExtractor(object):
def basename(self):
pieces = os.path.basename(self.filename).split('.')
+ orig_len = len(pieces)
extension = '.' + pieces[-1]
+ # This is maybe a little more clever than it ought to be.
+ # We're trying to be conservative about what remove, but also DTRT
+ # in cases like .tar.gz, and also do something reasonable if we
+ # encounter some completely off-the-wall extension. So that means:
+ # 1. First remove any compression extension.
+ # 2. Then remove any commonly known extension that remains.
+ # 3. If neither of those did anything, remove anything that looks
+ # like it's almost certainly an extension (less than 5 chars).
if mimetypes.encodings_map.has_key(extension):
pieces.pop()
extension = '.' + pieces[-1]
@@ -251,6 +256,9 @@ class BaseExtractor(object):
mimetypes.common_types.has_key(extension) or
mimetypes.suffix_map.has_key(extension)):
pieces.pop()
+ if ((orig_len == len(pieces)) and
+ (orig_len > 1) and (len(pieces[-1]) < 5)):
+ pieces.pop()
return '.'.join(pieces)
def get_stderr(self):
@@ -259,13 +267,25 @@ class BaseExtractor(object):
self.stderr.close()
return errors
- def check_success(self, got_output):
- error_index = self.first_bad_exit_code()
- if (not got_output) and (error_index is not None):
+ def is_fatal_error(self, status):
+ return False
+
+ def first_bad_exit_code(self):
+ for index, code in enumerate(self.exit_codes):
+ if code > 0:
+ return index, code
+ return None, None
+
+ def check_success(self, got_files):
+ error_index, error_code = self.first_bad_exit_code()
+ logger.debug("success results: %s %s %s" % (got_files, error_index,
+ self.exit_codes))
+ if (self.is_fatal_error(error_code) or
+ ((not got_files) and (error_code is not None))):
command = ' '.join(self.pipes[error_index][0])
raise ExtractorError("%s error: '%s' returned status code %s" %
(self.pipes[error_index][1], command,
- self.exit_codes[error_index]))
+ error_code))
def extract_archive(self):
self.pipe(self.extract_pipe)
@@ -340,6 +360,7 @@ class CompressionExtractor(BaseExtractor):
self.content_type = ONE_ENTRY_KNOWN
self.content_name = self.basename()
self.contents = None
+ self.file_count = 1
self.included_root = './'
try:
output_fd, self.target = tempfile.mkstemp(prefix='.dtrx-', dir='.')
@@ -352,6 +373,7 @@ class CompressionExtractor(BaseExtractor):
except EXTRACTION_ERRORS:
os.unlink(self.target)
raise
+
class TarExtractor(BaseExtractor):
file_type = 'tar file'
@@ -410,7 +432,7 @@ class DebExtractor(TarExtractor):
raise ExtractorError("data.tar file has unrecognized encoding")
self.pipe(['ar', 'p', self.filename, data_filename],
"extracting data.tar from .deb")
- self.pipe([self.decoders[encoding]], "decoding data.tar")
+ self.pipe(self.decoders[encoding], "decoding data.tar")
def basename(self):
pieces = os.path.basename(self.filename).split('_')
@@ -486,6 +508,39 @@ class ZipExtractor(NoPipeExtractor):
extract_command = ['unzip', '-q']
list_command = ['zipinfo', '-1']
+ def is_fatal_error(self, status):
+ return status > 1
+
+
+class LZHExtractor(ZipExtractor):
+ file_type = 'LZH file'
+ extract_command = ['lha', 'xq']
+ list_command = ['lha', 'l']
+
+ def border_line_file_index(self, line):
+ last_space_index = None
+ for index, char in enumerate(line):
+ if char == ' ':
+ last_space_index = index
+ elif char != '-':
+ return None
+ if last_space_index is None:
+ return None
+ return last_space_index + 1
+
+ def get_filenames(self):
+ filenames = NoPipeExtractor.get_filenames(self)
+ for line in filenames:
+ fn_index = self.border_line_file_index(line)
+ if fn_index is not None:
+ break
+ for line in filenames:
+ if self.border_line_file_index(line):
+ break
+ else:
+ yield line[fn_index:]
+ self.archive.close()
+
class SevenExtractor(NoPipeExtractor):
file_type = '7z file'
@@ -673,7 +728,9 @@ class EmptyHandler(object):
return contents == EMPTY
can_handle = staticmethod(can_handle)
- def __init__(self, extractor, options): pass
+ def __init__(self, extractor, options):
+ os.rmdir(extractor.target)
+
def handle(self): pass
@@ -799,7 +856,8 @@ class RecursionPolicy(BasePolicy):
def prep(self, current_filename, target, extractor):
archive_count = len(extractor.included_archives)
- if (self.permanent_policy is not None) or (archive_count == 0):
+ if ((self.permanent_policy is not None) or
+ ((archive_count * 10) <= extractor.file_count)):
self.current_policy = self.permanent_policy or RECURSE_NOT_NOW
return
question = self.wrap(
@@ -825,48 +883,56 @@ class RecursionPolicy(BasePolicy):
class ExtractorBuilder(object):
- extractor_map = {'tar': {'extractor': TarExtractor,
+ extractor_map = {'tar': {'extractors': (TarExtractor,),
'mimetypes': ('x-tar',),
'extensions': ('tar',),
'magic': ('POSIX tar archive',)},
- 'zip': {'extractor': ZipExtractor,
+ 'zip': {'extractors': (ZipExtractor, SevenExtractor),
'mimetypes': ('zip',),
'extensions': ('zip',),
'magic': ('(Zip|ZIP self-extracting) archive',)},
- 'rpm': {'extractor': RPMExtractor,
+ 'lzh': {'extractors': (LZHExtractor,),
+ 'mimetypes': ('x-lzh', 'x-lzh-compressed'),
+ 'extensions': ('lzh', 'lha'),
+ 'magic': ('LHa [\d\.\?]+ archive',)},
+ 'rpm': {'extractors': (RPMExtractor,),
'mimetypes': ('x-redhat-package-manager', 'x-rpm'),
'extensions': ('rpm',),
'magic': ('RPM',)},
- 'deb': {'extractor': DebExtractor,
- 'metadata': DebMetadataExtractor,
+ 'deb': {'extractors': (DebExtractor,),
+ 'metadata': (DebMetadataExtractor,),
'mimetypes': ('x-debian-package',),
'extensions': ('deb',),
'magic': ('Debian binary package',)},
- 'cpio': {'extractor': CpioExtractor,
+ 'cpio': {'extractors': (CpioExtractor,),
'mimetypes': ('x-cpio',),
'extensions': ('cpio',),
'magic': ('cpio archive',)},
- 'gem': {'extractor': GemExtractor,
- 'metadata': GemMetadataExtractor,
+ 'gem': {'extractors': (GemExtractor,),
+ 'metadata': (GemMetadataExtractor,),
'mimetypes': ('x-ruby-gem',),
'extensions': ('gem',)},
- '7z': {'extractor': SevenExtractor,
+ '7z': {'extractors': (SevenExtractor,),
'mimetypes': ('x-7z-compressed',),
'extensions': ('7z',),
'magic': ('7-zip archive',)},
- 'cab': {'extractor': CABExtractor,
+ 'cab': {'extractors': (CABExtractor,),
'mimetypes': ('x-cab',),
'extensions': ('cab',),
'magic': ('Microsoft Cabinet Archive',)},
- 'rar': {'extractor': RarExtractor,
+ 'rar': {'extractors': (RarExtractor,),
'mimetypes': ('rar',),
'extensions': ('rar',),
'magic': ('RAR archive',)},
- 'shield': {'extractor': ShieldExtractor,
+ 'shield': {'extractors': (ShieldExtractor,),
'mimetypes': ('x-cab',),
'extensions': ('cab', 'hdr'),
'magic': ('InstallShield CAB',)},
- 'compress': {'extractor': CompressionExtractor}
+ 'msi': {'extractors': (SevenExtractor,),
+ 'mimetypes': ('x-msi', 'x-ole-storage'),
+ 'extensions': ('msi',),
+ 'magic': ('Application: Windows Installer',)},
+ 'compress': {'extractors': (CompressionExtractor,)}
}
mimetype_map = {}
@@ -886,6 +952,7 @@ class ExtractorBuilder(object):
('tar', 'gzip', 'tar.gz', 'tgz'),
('tar', 'lzma', 'tar.lzma', 'tlz'),
('tar', 'xz', 'tar.xz'),
+ ('tar', 'lz', 'tar.lz'),
('tar', 'compress', 'tar.Z', 'taz'),
('compress', 'gzip', 'Z', 'gz'),
('compress', 'bzip2', 'bz2'),
@@ -898,6 +965,7 @@ class ExtractorBuilder(object):
for mapping in (('bzip2', 'bzip2 compressed'),
('gzip', 'gzip compressed'),
('lzma', 'LZMA compressed'),
+ ('lzip', 'lzip compressed'),
('xz', 'xz compressed')):
for pattern in mapping[1:]:
magic_encoding_map[re.compile(pattern)] = mapping[0]
@@ -907,12 +975,13 @@ class ExtractorBuilder(object):
self.options = options
def build_extractor(self, archive_type, encoding):
- extractors = self.extractor_map[archive_type]
- if self.options.metadata and extractors.has_key('metadata'):
- extractor = extractors['metadata']
+ type_info = self.extractor_map[archive_type]
+ if self.options.metadata and type_info.has_key('metadata'):
+ extractors = type_info['metadata']
else:
- extractor = extractors['extractor']
- return extractor(self.filename, encoding)
+ extractors = type_info['extractors']
+ for extractor in extractors:
+ yield extractor(self.filename, encoding)
def get_extractor(self):
tried_types = set()
@@ -932,7 +1001,8 @@ class ExtractorBuilder(object):
tried_types.add(ext_args)
logger.debug("trying %s extractor from %s" %
(ext_args, func_name))
- yield self.build_extractor(*ext_args)
+ for extractor in self.build_extractor(*ext_args):
+ yield extractor
def try_by_mimetype(cls, filename):
mimetype, encoding = mimetypes.guess_type(filename)
@@ -1228,6 +1298,21 @@ class ExtractorApplication(object):
self.show_stderr(logger.error, stderr)
return True
+ def download(self, filename):
+ url = filename.lower()
+ for protocol in 'http', 'https', 'ftp':
+ if url.startswith(protocol + '://'):
+ break
+ else:
+ return filename, None
+ # FIXME: This can fail if there's already a file in the directory
+ # that matches the basename of the URL.
+ status = subprocess.call(['wget', '-c', filename],
+ stdin=subprocess.PIPE)
+ if status != 0:
+ return None, "wget returned status code %s" % (status,)
+ return os.path.basename(urlparse.urlparse(filename)[2]), None
+
def run(self):
if self.options.show_list:
action = ListAction
@@ -1238,9 +1323,12 @@ class ExtractorApplication(object):
self.current_directory, self.filenames = self.archives.popitem()
os.chdir(self.current_directory)
for filename in self.filenames:
- builder = ExtractorBuilder(filename, self.options)
- error = (self.check_file(filename) or
- self.try_extractors(filename, builder.get_extractor()))
+ filename, error = self.download(filename)
+ if not error:
+ builder = ExtractorBuilder(filename, self.options)
+ error = (self.check_file(filename) or
+ self.try_extractors(filename,
+ builder.get_extractor()))
if error:
if error != True:
logger.error("%s: %s" % (filename, error))