diff options
Diffstat (limited to 'silx/app/convert.py')
-rw-r--r-- | silx/app/convert.py | 525 |
1 files changed, 0 insertions, 525 deletions
diff --git a/silx/app/convert.py b/silx/app/convert.py deleted file mode 100644 index 7e601ce..0000000 --- a/silx/app/convert.py +++ /dev/null @@ -1,525 +0,0 @@ -# coding: utf-8 -# /*########################################################################## -# Copyright (C) 2017-2018 European Synchrotron Radiation Facility -# -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to deal -# in the Software without restriction, including without limitation the rights -# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -# copies of the Software, and to permit persons to whom the Software is -# furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included in -# all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -# THE SOFTWARE. -# -# ############################################################################*/ -"""Convert silx supported data files into HDF5 files""" - -__authors__ = ["P. Knobel"] -__license__ = "MIT" -__date__ = "05/02/2019" - -import ast -import os -import argparse -from glob import glob -import logging -import re -import time -import numpy -import six - -import silx.io -from silx.io.specfile import is_specfile -from silx.io import fabioh5 - -_logger = logging.getLogger(__name__) -"""Module logger""" - - -def c_format_string_to_re(pattern_string): - """ - - :param pattern_string: C style format string with integer patterns - (e.g. "%d", "%04d"). - Not supported: fixed length padded with whitespaces (e.g "%4d", "%-4d") - :return: Equivalent regular expression (e.g. "\\d+", "\\d{4}") - """ - # escape dots and backslashes - pattern_string = pattern_string.replace("\\", "\\\\") - pattern_string = pattern_string.replace(".", r"\.") - - # %d - pattern_string = pattern_string.replace("%d", r"([-+]?\d+)") - - # %0nd - for sub_pattern in re.findall(r"%0\d+d", pattern_string): - n = int(re.search(r"%0(\d+)d", sub_pattern).group(1)) - if n == 1: - re_sub_pattern = r"([+-]?\d)" - else: - re_sub_pattern = r"([\d+-]\d{%d})" % (n - 1) - pattern_string = pattern_string.replace(sub_pattern, re_sub_pattern, 1) - - return pattern_string - - -def drop_indices_before_begin(filenames, regex, begin): - """ - - :param List[str] filenames: list of filenames - :param str regex: Regexp used to find indices in a filename - :param str begin: Comma separated list of begin indices - :return: List of filenames with only indices >= begin - """ - begin_indices = list(map(int, begin.split(","))) - output_filenames = [] - for fname in filenames: - m = re.match(regex, fname) - file_indices = list(map(int, m.groups())) - if len(file_indices) != len(begin_indices): - raise IOError("Number of indices found in filename " - "does not match number of parsed end indices.") - good_indices = True - for i, fidx in enumerate(file_indices): - if fidx < begin_indices[i]: - good_indices = False - if good_indices: - output_filenames.append(fname) - return output_filenames - - -def drop_indices_after_end(filenames, regex, end): - """ - - :param List[str] filenames: list of filenames - :param str regex: Regexp used to find indices in a filename - :param str end: Comma separated list of end indices - :return: List of filenames with only indices <= end - """ - end_indices = list(map(int, end.split(","))) - output_filenames = [] - for fname in filenames: - m = re.match(regex, fname) - file_indices = list(map(int, m.groups())) - if len(file_indices) != len(end_indices): - raise IOError("Number of indices found in filename " - "does not match number of parsed end indices.") - good_indices = True - for i, fidx in enumerate(file_indices): - if fidx > end_indices[i]: - good_indices = False - if good_indices: - output_filenames.append(fname) - return output_filenames - - -def are_files_missing_in_series(filenames, regex): - """Return True if any file is missing in a list of filenames - that are supposed to follow a pattern. - - :param List[str] filenames: list of filenames - :param str regex: Regexp used to find indices in a filename - :return: boolean - :raises AssertionError: if a filename does not match the regexp - """ - previous_indices = None - for fname in filenames: - m = re.match(regex, fname) - assert m is not None, \ - "regex %s does not match filename %s" % (fname, regex) - new_indices = list(map(int, m.groups())) - if previous_indices is not None: - for old_idx, new_idx in zip(previous_indices, new_indices): - if (new_idx - old_idx) > 1: - _logger.error("Index increment > 1 in file series: " - "previous idx %d, next idx %d", - old_idx, new_idx) - return True - previous_indices = new_indices - return False - - -def are_all_specfile(filenames): - """Return True if all files in a list are SPEC files. - :param List[str] filenames: list of filenames - """ - for fname in filenames: - if not is_specfile(fname): - return False - return True - - -def contains_specfile(filenames): - """Return True if any file in a list are SPEC files. - :param List[str] filenames: list of filenames - """ - for fname in filenames: - if is_specfile(fname): - return True - return False - - -def main(argv): - """ - Main function to launch the converter as an application - - :param argv: Command line arguments - :returns: exit status - """ - parser = argparse.ArgumentParser(description=__doc__) - parser.add_argument( - 'input_files', - nargs="*", - help='Input files (EDF, TIFF, SPEC...). When specifying multiple ' - 'files, you cannot specify both fabio images and SPEC files. ' - 'Multiple SPEC files will simply be concatenated, with one ' - 'entry per scan. Multiple image files will be merged into ' - 'a single entry with a stack of images.') - # input_files and --filepattern are mutually exclusive - parser.add_argument( - '--file-pattern', - help='File name pattern for loading a series of indexed image files ' - '(toto_%%04d.edf). This argument is incompatible with argument ' - 'input_files. If an output URI with a HDF5 path is provided, ' - 'only the content of the NXdetector group will be copied there. ' - 'If no HDF5 path, or just "/", is given, a complete NXdata ' - 'structure will be created.') - parser.add_argument( - '-o', '--output-uri', - default=time.strftime("%Y%m%d-%H%M%S") + '.h5', - help='Output file name (HDF5). An URI can be provided to write' - ' the data into a specific group in the output file: ' - '/path/to/file::/path/to/group. ' - 'If not provided, the filename defaults to a timestamp:' - ' YYYYmmdd-HHMMSS.h5') - parser.add_argument( - '-m', '--mode', - default="w-", - help='Write mode: "r+" (read/write, file must exist), ' - '"w" (write, existing file is lost), ' - '"w-" (write, fail if file exists) or ' - '"a" (read/write if exists, create otherwise)') - parser.add_argument( - '--begin', - help='First file index, or first file indices to be considered. ' - 'This argument only makes sense when used together with ' - '--file-pattern. Provide as many start indices as there ' - 'are indices in the file pattern, separated by commas. ' - 'Examples: "--filepattern toto_%%d.edf --begin 100", ' - ' "--filepattern toto_%%d_%%04d_%%02d.edf --begin 100,2000,5".') - parser.add_argument( - '--end', - help='Last file index, or last file indices to be considered. ' - 'The same rules as with argument --begin apply. ' - 'Example: "--filepattern toto_%%d_%%d.edf --end 199,1999"') - parser.add_argument( - '--add-root-group', - action="store_true", - help='This option causes each input file to be written to a ' - 'specific root group with the same name as the file. When ' - 'merging multiple input files, this can help preventing conflicts' - ' when datasets have the same name (see --overwrite-data). ' - 'This option is ignored when using --file-pattern.') - parser.add_argument( - '--overwrite-data', - action="store_true", - help='If the output path exists and an input dataset has the same' - ' name as an existing output dataset, overwrite the output ' - 'dataset (in modes "r+" or "a").') - parser.add_argument( - '--min-size', - type=int, - default=500, - help='Minimum number of elements required to be in a dataset to ' - 'apply compression or chunking (default 500).') - parser.add_argument( - '--chunks', - nargs="?", - const="auto", - help='Chunk shape. Provide an argument that evaluates as a python ' - 'tuple (e.g. "(1024, 768)"). If this option is provided without ' - 'specifying an argument, the h5py library will guess a chunk for ' - 'you. Note that if you specify an explicit chunking shape, it ' - 'will be applied identically to all datasets with a large enough ' - 'size (see --min-size). ') - parser.add_argument( - '--compression', - nargs="?", - const="gzip", - help='Compression filter. By default, the datasets in the output ' - 'file are not compressed. If this option is specified without ' - 'argument, the GZIP compression is used. Additional compression ' - 'filters may be available, depending on your HDF5 installation.') - - def check_gzip_compression_opts(value): - ivalue = int(value) - if ivalue < 0 or ivalue > 9: - raise argparse.ArgumentTypeError( - "--compression-opts must be an int from 0 to 9") - return ivalue - - parser.add_argument( - '--compression-opts', - type=check_gzip_compression_opts, - help='Compression options. For "gzip", this may be an integer from ' - '0 to 9, with a default of 4. This is only supported for GZIP.') - parser.add_argument( - '--shuffle', - action="store_true", - help='Enables the byte shuffle filter. This may improve the compression ' - 'ratio for block oriented compressors like GZIP or LZF.') - parser.add_argument( - '--fletcher32', - action="store_true", - help='Adds a checksum to each chunk to detect data corruption.') - parser.add_argument( - '--debug', - action="store_true", - default=False, - help='Set logging system in debug mode') - - options = parser.parse_args(argv[1:]) - - if options.debug: - logging.root.setLevel(logging.DEBUG) - - # Import after parsing --debug - try: - # it should be loaded before h5py - import hdf5plugin # noqa - except ImportError: - _logger.debug("Backtrace", exc_info=True) - hdf5plugin = None - - import h5py - - try: - from silx.io.convert import write_to_h5 - except ImportError: - _logger.debug("Backtrace", exc_info=True) - write_to_h5 = None - - if hdf5plugin is None: - message = "Module 'hdf5plugin' is not installed. It supports additional hdf5"\ - + " compressions. You can install it using \"pip install hdf5plugin\"." - _logger.debug(message) - - # Process input arguments (mutually exclusive arguments) - if bool(options.input_files) == bool(options.file_pattern is not None): - if not options.input_files: - message = "You must specify either input files (at least one), " - message += "or a file pattern." - else: - message = "You cannot specify input files and a file pattern" - message += " at the same time." - _logger.error(message) - return -1 - elif options.input_files: - # some shells (windows) don't interpret wildcard characters (*, ?, []) - old_input_list = list(options.input_files) - options.input_files = [] - for fname in old_input_list: - globbed_files = glob(fname) - if not globbed_files: - # no files found, keep the name as it is, to raise an error later - options.input_files += [fname] - else: - # glob does not sort files, but the bash shell does - options.input_files += sorted(globbed_files) - else: - # File series - dirname = os.path.dirname(options.file_pattern) - file_pattern_re = c_format_string_to_re(options.file_pattern) + "$" - files_in_dir = glob(os.path.join(dirname, "*")) - _logger.debug(""" - Processing file_pattern - dirname: %s - file_pattern_re: %s - files_in_dir: %s - """, dirname, file_pattern_re, files_in_dir) - - options.input_files = sorted(list(filter(lambda name: re.match(file_pattern_re, name), - files_in_dir))) - _logger.debug("options.input_files: %s", options.input_files) - - if options.begin is not None: - options.input_files = drop_indices_before_begin(options.input_files, - file_pattern_re, - options.begin) - _logger.debug("options.input_files after applying --begin: %s", - options.input_files) - - if options.end is not None: - options.input_files = drop_indices_after_end(options.input_files, - file_pattern_re, - options.end) - _logger.debug("options.input_files after applying --end: %s", - options.input_files) - - if are_files_missing_in_series(options.input_files, - file_pattern_re): - _logger.error("File missing in the file series. Aborting.") - return -1 - - if not options.input_files: - _logger.error("No file matching --file-pattern found.") - return -1 - - # Test that the output path is writeable - if "::" in options.output_uri: - output_name, hdf5_path = options.output_uri.split("::") - else: - output_name, hdf5_path = options.output_uri, "/" - - if os.path.isfile(output_name): - if options.mode == "w-": - _logger.error("Output file %s exists and mode is 'w-' (default)." - " Aborting. To append data to an existing file, " - "use 'a' or 'r+'.", - output_name) - return -1 - elif not os.access(output_name, os.W_OK): - _logger.error("Output file %s exists and is not writeable.", - output_name) - return -1 - elif options.mode == "w": - _logger.info("Output file %s exists and mode is 'w'. " - "Overwriting existing file.", output_name) - elif options.mode in ["a", "r+"]: - _logger.info("Appending data to existing file %s.", - output_name) - else: - if options.mode == "r+": - _logger.error("Output file %s does not exist and mode is 'r+'" - " (append, file must exist). Aborting.", - output_name) - return -1 - else: - _logger.info("Creating new output file %s.", - output_name) - - # Test that all input files exist and are readable - bad_input = False - for fname in options.input_files: - if not os.access(fname, os.R_OK): - _logger.error("Cannot read input file %s.", - fname) - bad_input = True - if bad_input: - _logger.error("Aborting.") - return -1 - - # create_dataset special args - create_dataset_args = {} - if options.chunks is not None: - if options.chunks.lower() in ["auto", "true"]: - create_dataset_args["chunks"] = True - else: - try: - chunks = ast.literal_eval(options.chunks) - except (ValueError, SyntaxError): - _logger.error("Invalid --chunks argument %s", options.chunks) - return -1 - if not isinstance(chunks, (tuple, list)): - _logger.error("--chunks argument str does not evaluate to a tuple") - return -1 - else: - nitems = numpy.prod(chunks) - nbytes = nitems * 8 - if nbytes > 10**6: - _logger.warning("Requested chunk size might be larger than" - " the default 1MB chunk cache, for float64" - " data. This can dramatically affect I/O " - "performances.") - create_dataset_args["chunks"] = chunks - - if options.compression is not None: - try: - compression = int(options.compression) - except ValueError: - compression = options.compression - create_dataset_args["compression"] = compression - - if options.compression_opts is not None: - create_dataset_args["compression_opts"] = options.compression_opts - - if options.shuffle: - create_dataset_args["shuffle"] = True - - if options.fletcher32: - create_dataset_args["fletcher32"] = True - - if (len(options.input_files) > 1 and - not contains_specfile(options.input_files) and - not options.add_root_group) or options.file_pattern is not None: - # File series -> stack of images - input_group = fabioh5.File(file_series=options.input_files) - if hdf5_path != "/": - # we want to append only data and headers to an existing file - input_group = input_group["/scan_0/instrument/detector_0"] - with h5py.File(output_name, mode=options.mode) as h5f: - write_to_h5(input_group, h5f, - h5path=hdf5_path, - overwrite_data=options.overwrite_data, - create_dataset_args=create_dataset_args, - min_size=options.min_size) - - elif len(options.input_files) == 1 or \ - are_all_specfile(options.input_files) or\ - options.add_root_group: - # single file, or spec files - h5paths_and_groups = [] - for input_name in options.input_files: - hdf5_path_for_file = hdf5_path - if options.add_root_group: - hdf5_path_for_file = hdf5_path.rstrip("/") + "/" + os.path.basename(input_name) - try: - h5paths_and_groups.append((hdf5_path_for_file, - silx.io.open(input_name))) - except IOError: - _logger.error("Cannot read file %s. If this is a file format " - "supported by the fabio library, you can try to" - " install fabio (`pip install fabio`)." - " Aborting conversion.", - input_name) - return -1 - - with h5py.File(output_name, mode=options.mode) as h5f: - for hdf5_path_for_file, input_group in h5paths_and_groups: - write_to_h5(input_group, h5f, - h5path=hdf5_path_for_file, - overwrite_data=options.overwrite_data, - create_dataset_args=create_dataset_args, - min_size=options.min_size) - - else: - # multiple file, SPEC and fabio images mixed - _logger.error("Multiple files with incompatible formats specified. " - "You can provide multiple SPEC files or multiple image " - "files, but not both.") - return -1 - - with h5py.File(output_name, mode="r+") as h5f: - # append "silx convert" to the creator attribute, for NeXus files - previous_creator = h5f.attrs.get("creator", u"") - creator = "silx convert (v%s)" % silx.version - # only if it not already there - if creator not in previous_creator: - if not previous_creator: - new_creator = creator - else: - new_creator = previous_creator + "; " + creator - h5f.attrs["creator"] = numpy.array( - new_creator, - dtype=h5py.special_dtype(vlen=six.text_type)) - - return 0 |