diff options
Diffstat (limited to 'silx/app/convert.py')
-rw-r--r-- | silx/app/convert.py | 283 |
1 files changed, 283 insertions, 0 deletions
diff --git a/silx/app/convert.py b/silx/app/convert.py new file mode 100644 index 0000000..a092ec1 --- /dev/null +++ b/silx/app/convert.py @@ -0,0 +1,283 @@ +# coding: utf-8 +# /*########################################################################## +# Copyright (C) 2017 European Synchrotron Radiation Facility +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. +# +# ############################################################################*/ +"""Convert silx supported data files into HDF5 files""" + +import ast +import sys +import os +import argparse +from glob import glob +import logging +import numpy +import silx + + +__authors__ = ["P. Knobel"] +__license__ = "MIT" +__date__ = "12/09/2017" + + +_logger = logging.getLogger(__name__) +"""Module logger""" + + +def main(argv): + """ + Main function to launch the converter as an application + + :param argv: Command line arguments + :returns: exit status + """ + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument( + 'input_files', + nargs="+", + help='Input files (EDF, SPEC)') + parser.add_argument( + '-o', '--output-uri', + nargs="?", + help='Output file (HDF5). If omitted, it will be the ' + 'concatenated input file names, with a ".h5" suffix added.' + ' An URI can be provided to write the data into a specific ' + 'group in the output file: /path/to/file::/path/to/group') + parser.add_argument( + '-m', '--mode', + default="w-", + help='Write mode: "r+" (read/write, file must exist), ' + '"w" (write, existing file is lost), ' + '"w-" (write, fail if file exists) or ' + '"a" (read/write if exists, create otherwise)') + parser.add_argument( + '--no-root-group', + action="store_true", + help='This option disables the default behavior of creating a ' + 'root group (entry) for each file to be converted. When ' + 'merging multiple input files, this can cause conflicts ' + 'when datasets have the same name (see --overwrite-data).') + parser.add_argument( + '--overwrite-data', + action="store_true", + help='If the output path exists and an input dataset has the same' + ' name as an existing output dataset, overwrite the output ' + 'dataset (in modes "r+" or "a").') + parser.add_argument( + '--min-size', + type=int, + default=500, + help='Minimum number of elements required to be in a dataset to ' + 'apply compression or chunking (default 500).') + parser.add_argument( + '--chunks', + nargs="?", + const="auto", + help='Chunk shape. Provide an argument that evaluates as a python ' + 'tuple (e.g. "(1024, 768)"). If this option is provided without ' + 'specifying an argument, the h5py library will guess a chunk for ' + 'you. Note that if you specify an explicit chunking shape, it ' + 'will be applied identically to all datasets with a large enough ' + 'size (see --min-size). ') + parser.add_argument( + '--compression', + nargs="?", + const="gzip", + help='Compression filter. By default, the datasets in the output ' + 'file are not compressed. If this option is specified without ' + 'argument, the GZIP compression is used. Additional compression ' + 'filters may be available, depending on your HDF5 installation.') + + def check_gzip_compression_opts(value): + ivalue = int(value) + if ivalue < 0 or ivalue > 9: + raise argparse.ArgumentTypeError( + "--compression-opts must be an int from 0 to 9") + return ivalue + + parser.add_argument( + '--compression-opts', + type=check_gzip_compression_opts, + help='Compression options. For "gzip", this may be an integer from ' + '0 to 9, with a default of 4. This is only supported for GZIP.') + parser.add_argument( + '--shuffle', + action="store_true", + help='Enables the byte shuffle filter, may improve the compression ' + 'ratio for block oriented compressors like GZIP or LZF.') + parser.add_argument( + '--fletcher32', + action="store_true", + help='Adds a checksum to each chunk to detect data corruption.') + parser.add_argument( + '--debug', + action="store_true", + default=False, + help='Set logging system in debug mode') + + options = parser.parse_args(argv[1:]) + + # some shells (windows) don't interpret wildcard characters (*, ?, []) + old_input_list = list(options.input_files) + options.input_files = [] + for fname in old_input_list: + globbed_files = glob(fname) + if not globbed_files: + # no files found, keep the name as it is, to raise an error later + options.input_files += [fname] + else: + options.input_files += globbed_files + old_input_list = None + + if options.debug: + logging.root.setLevel(logging.DEBUG) + + # Import most of the things here to be sure to use the right logging level + try: + # it should be loaded before h5py + import hdf5plugin # noqa + except ImportError: + _logger.debug("Backtrace", exc_info=True) + hdf5plugin = None + + try: + import h5py + from silx.io.convert import write_to_h5 + except ImportError: + _logger.debug("Backtrace", exc_info=True) + h5py = None + write_to_h5 = None + + if h5py is None: + message = "Module 'h5py' is not installed but is mandatory."\ + + " You can install it using \"pip install h5py\"." + _logger.error(message) + return -1 + + if hdf5plugin is None: + message = "Module 'hdf5plugin' is not installed. It supports additional hdf5"\ + + " compressions. You can install it using \"pip install hdf5plugin\"." + _logger.debug(message) + + # Test that the output path is writeable + if options.output_uri is None: + input_basenames = [os.path.basename(name) for name in options.input_files] + output_name = ''.join(input_basenames) + ".h5" + _logger.info("No output file specified, using %s", output_name) + hdf5_path = "/" + else: + if "::" in options.output_uri: + output_name, hdf5_path = options.output_uri.split("::") + else: + output_name, hdf5_path = options.output_uri, "/" + + if os.path.isfile(output_name): + if options.mode == "w-": + _logger.error("Output file %s exists and mode is 'w-'" + " (write, file must not exist). Aborting.", + output_name) + return -1 + elif not os.access(output_name, os.W_OK): + _logger.error("Output file %s exists and is not writeable.", + output_name) + return -1 + elif options.mode == "w": + _logger.info("Output file %s exists and mode is 'w'. " + "Overwriting existing file.", output_name) + elif options.mode in ["a", "r+"]: + _logger.info("Appending data to existing file %s.", + output_name) + else: + if options.mode == "r+": + _logger.error("Output file %s does not exist and mode is 'r+'" + " (append, file must exist). Aborting.", + output_name) + return -1 + else: + _logger.info("Creating new output file %s.", + output_name) + + # Test that all input files exist and are readable + bad_input = False + for fname in options.input_files: + if not os.access(fname, os.R_OK): + _logger.error("Cannot read input file %s.", + fname) + bad_input = True + if bad_input: + _logger.error("Aborting.") + return -1 + + # create_dataset special args + create_dataset_args = {} + if options.chunks is not None: + if options.chunks.lower() in ["auto", "true"]: + create_dataset_args["chunks"] = True + else: + try: + chunks = ast.literal_eval(options.chunks) + except (ValueError, SyntaxError): + _logger.error("Invalid --chunks argument %s", options.chunks) + return -1 + if not isinstance(chunks, (tuple, list)): + _logger.error("--chunks argument str does not evaluate to a tuple") + return -1 + else: + nitems = numpy.prod(chunks) + nbytes = nitems * 8 + if nbytes > 10**6: + _logger.warning("Requested chunk size might be larger than" + " the default 1MB chunk cache, for float64" + " data. This can dramatically affect I/O " + "performances.") + create_dataset_args["chunks"] = chunks + + if options.compression is not None: + create_dataset_args["compression"] = options.compression + + if options.compression_opts is not None: + create_dataset_args["compression_opts"] = options.compression_opts + + if options.shuffle: + create_dataset_args["shuffle"] = True + + if options.fletcher32: + create_dataset_args["fletcher32"] = True + + with h5py.File(output_name, mode=options.mode) as h5f: + for input_name in options.input_files: + hdf5_path_for_file = hdf5_path + if not options.no_root_group: + hdf5_path_for_file = hdf5_path.rstrip("/") + "/" + os.path.basename(input_name) + write_to_h5(input_name, h5f, + h5path=hdf5_path_for_file, + overwrite_data=options.overwrite_data, + create_dataset_args=create_dataset_args, + min_size=options.min_size) + + # append the convert command to the creator attribute, for NeXus files + creator = h5f[hdf5_path_for_file].attrs.get("creator", b"").decode() + convert_command = " ".join(argv) + if convert_command not in creator: + h5f[hdf5_path_for_file].attrs["creator"] = \ + numpy.string_(creator + "; convert command: %s" % " ".join(argv)) + + return 0 |