# coding: utf-8 # /*########################################################################## # Copyright (C) 2017 European Synchrotron Radiation Facility # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN # THE SOFTWARE. # # ############################################################################*/ """Convert silx supported data files into HDF5 files""" import ast import sys import os import argparse from glob import glob import logging import numpy import silx __authors__ = ["P. Knobel"] __license__ = "MIT" __date__ = "12/09/2017" _logger = logging.getLogger(__name__) """Module logger""" def main(argv): """ Main function to launch the converter as an application :param argv: Command line arguments :returns: exit status """ parser = argparse.ArgumentParser(description=__doc__) parser.add_argument( 'input_files', nargs="+", help='Input files (EDF, SPEC)') parser.add_argument( '-o', '--output-uri', nargs="?", help='Output file (HDF5). If omitted, it will be the ' 'concatenated input file names, with a ".h5" suffix added.' ' An URI can be provided to write the data into a specific ' 'group in the output file: /path/to/file::/path/to/group') parser.add_argument( '-m', '--mode', default="w-", help='Write mode: "r+" (read/write, file must exist), ' '"w" (write, existing file is lost), ' '"w-" (write, fail if file exists) or ' '"a" (read/write if exists, create otherwise)') parser.add_argument( '--no-root-group', action="store_true", help='This option disables the default behavior of creating a ' 'root group (entry) for each file to be converted. When ' 'merging multiple input files, this can cause conflicts ' 'when datasets have the same name (see --overwrite-data).') parser.add_argument( '--overwrite-data', action="store_true", help='If the output path exists and an input dataset has the same' ' name as an existing output dataset, overwrite the output ' 'dataset (in modes "r+" or "a").') parser.add_argument( '--min-size', type=int, default=500, help='Minimum number of elements required to be in a dataset to ' 'apply compression or chunking (default 500).') parser.add_argument( '--chunks', nargs="?", const="auto", help='Chunk shape. Provide an argument that evaluates as a python ' 'tuple (e.g. "(1024, 768)"). If this option is provided without ' 'specifying an argument, the h5py library will guess a chunk for ' 'you. Note that if you specify an explicit chunking shape, it ' 'will be applied identically to all datasets with a large enough ' 'size (see --min-size). ') parser.add_argument( '--compression', nargs="?", const="gzip", help='Compression filter. By default, the datasets in the output ' 'file are not compressed. If this option is specified without ' 'argument, the GZIP compression is used. Additional compression ' 'filters may be available, depending on your HDF5 installation.') def check_gzip_compression_opts(value): ivalue = int(value) if ivalue < 0 or ivalue > 9: raise argparse.ArgumentTypeError( "--compression-opts must be an int from 0 to 9") return ivalue parser.add_argument( '--compression-opts', type=check_gzip_compression_opts, help='Compression options. For "gzip", this may be an integer from ' '0 to 9, with a default of 4. This is only supported for GZIP.') parser.add_argument( '--shuffle', action="store_true", help='Enables the byte shuffle filter, may improve the compression ' 'ratio for block oriented compressors like GZIP or LZF.') parser.add_argument( '--fletcher32', action="store_true", help='Adds a checksum to each chunk to detect data corruption.') parser.add_argument( '--debug', action="store_true", default=False, help='Set logging system in debug mode') options = parser.parse_args(argv[1:]) # some shells (windows) don't interpret wildcard characters (*, ?, []) old_input_list = list(options.input_files) options.input_files = [] for fname in old_input_list: globbed_files = glob(fname) if not globbed_files: # no files found, keep the name as it is, to raise an error later options.input_files += [fname] else: options.input_files += globbed_files old_input_list = None if options.debug: logging.root.setLevel(logging.DEBUG) # Import most of the things here to be sure to use the right logging level try: # it should be loaded before h5py import hdf5plugin # noqa except ImportError: _logger.debug("Backtrace", exc_info=True) hdf5plugin = None try: import h5py from silx.io.convert import write_to_h5 except ImportError: _logger.debug("Backtrace", exc_info=True) h5py = None write_to_h5 = None if h5py is None: message = "Module 'h5py' is not installed but is mandatory."\ + " You can install it using \"pip install h5py\"." _logger.error(message) return -1 if hdf5plugin is None: message = "Module 'hdf5plugin' is not installed. It supports additional hdf5"\ + " compressions. You can install it using \"pip install hdf5plugin\"." _logger.debug(message) # Test that the output path is writeable if options.output_uri is None: input_basenames = [os.path.basename(name) for name in options.input_files] output_name = ''.join(input_basenames) + ".h5" _logger.info("No output file specified, using %s", output_name) hdf5_path = "/" else: if "::" in options.output_uri: output_name, hdf5_path = options.output_uri.split("::") else: output_name, hdf5_path = options.output_uri, "/" if os.path.isfile(output_name): if options.mode == "w-": _logger.error("Output file %s exists and mode is 'w-'" " (write, file must not exist). Aborting.", output_name) return -1 elif not os.access(output_name, os.W_OK): _logger.error("Output file %s exists and is not writeable.", output_name) return -1 elif options.mode == "w": _logger.info("Output file %s exists and mode is 'w'. " "Overwriting existing file.", output_name) elif options.mode in ["a", "r+"]: _logger.info("Appending data to existing file %s.", output_name) else: if options.mode == "r+": _logger.error("Output file %s does not exist and mode is 'r+'" " (append, file must exist). Aborting.", output_name) return -1 else: _logger.info("Creating new output file %s.", output_name) # Test that all input files exist and are readable bad_input = False for fname in options.input_files: if not os.access(fname, os.R_OK): _logger.error("Cannot read input file %s.", fname) bad_input = True if bad_input: _logger.error("Aborting.") return -1 # create_dataset special args create_dataset_args = {} if options.chunks is not None: if options.chunks.lower() in ["auto", "true"]: create_dataset_args["chunks"] = True else: try: chunks = ast.literal_eval(options.chunks) except (ValueError, SyntaxError): _logger.error("Invalid --chunks argument %s", options.chunks) return -1 if not isinstance(chunks, (tuple, list)): _logger.error("--chunks argument str does not evaluate to a tuple") return -1 else: nitems = numpy.prod(chunks) nbytes = nitems * 8 if nbytes > 10**6: _logger.warning("Requested chunk size might be larger than" " the default 1MB chunk cache, for float64" " data. This can dramatically affect I/O " "performances.") create_dataset_args["chunks"] = chunks if options.compression is not None: create_dataset_args["compression"] = options.compression if options.compression_opts is not None: create_dataset_args["compression_opts"] = options.compression_opts if options.shuffle: create_dataset_args["shuffle"] = True if options.fletcher32: create_dataset_args["fletcher32"] = True with h5py.File(output_name, mode=options.mode) as h5f: for input_name in options.input_files: hdf5_path_for_file = hdf5_path if not options.no_root_group: hdf5_path_for_file = hdf5_path.rstrip("/") + "/" + os.path.basename(input_name) write_to_h5(input_name, h5f, h5path=hdf5_path_for_file, overwrite_data=options.overwrite_data, create_dataset_args=create_dataset_args, min_size=options.min_size) # append the convert command to the creator attribute, for NeXus files creator = h5f[hdf5_path_for_file].attrs.get("creator", b"").decode() convert_command = " ".join(argv) if convert_command not in creator: h5f[hdf5_path_for_file].attrs["creator"] = \ numpy.string_(creator + "; convert command: %s" % " ".join(argv)) return 0