From bfa4dba15485b4192f8bbe13345e9658c97ecf76 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Picca=20Fr=C3=A9d=C3=A9ric-Emmanuel?= Date: Sat, 7 Oct 2017 07:59:01 +0200 Subject: New upstream version 0.6.0+dfsg --- silx/io/convert.py | 302 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 302 insertions(+) create mode 100644 silx/io/convert.py (limited to 'silx/io/convert.py') diff --git a/silx/io/convert.py b/silx/io/convert.py new file mode 100644 index 0000000..41f1e36 --- /dev/null +++ b/silx/io/convert.py @@ -0,0 +1,302 @@ +# coding: utf-8 +# /*########################################################################## +# Copyright (C) 2016-2017 European Synchrotron Radiation Facility +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. +# +# ############################################################################*/ +"""This module provides classes and function to convert file formats supported +by *silx* into HDF5 file. Currently, SPEC file and fabio images are the +supported formats. + +Read the documentation of :mod:`silx.io.spech5` and :mod:`silx.io.fabioh5` for +information on the structure of the output HDF5 files. + +Strings are written to the HDF5 datasets as fixed-length ASCII (NumPy *S* type). +This is done in order to produce files that have maximum compatibility with +other HDF5 libraries, as recommended in the +`h5py documentation `_. + +If you read the files back with *h5py* in Python 3, you will recover strings +as bytes, which you should decode to transform them into python strings:: + + >>> import h5py + >>> f = h5py.File("myfile.h5") + >>> f["/1.1/instrument/specfile/scan_header"][0] + b'#S 94 ascan del -0.5 0.5 20 1' + >>> f["/1.1/instrument/specfile/scan_header"][0].decode() + '#S 94 ascan del -0.5 0.5 20 1' + +Arrays of strings, such as file and scan headers, are stored as fixed-length +strings. The length of all strings in an array is equal to the length of the +longest string. Shorter strings are right-padded with blank spaces. + +.. note:: This module has a dependency on the `h5py `_ + library, which is not a mandatory dependency for `silx`. You might need + to install it if you don't already have it. +""" + +import numpy +import logging + +import silx.io +from silx.io import is_dataset, is_group, is_softlink + +__authors__ = ["P. Knobel"] +__license__ = "MIT" +__date__ = "14/09/2017" + +_logger = logging.getLogger(__name__) + +try: + import h5py +except ImportError as e: + _logger.error("Module " + __name__ + " requires h5py") + raise e + + +def _create_link(h5f, link_name, target_name, + link_type="soft", overwrite_data=False): + """Create a link in a HDF5 file + + If member with name ``link_name`` already exists, delete it first or + ignore link depending on global param ``overwrite_data``. + + :param h5f: :class:`h5py.File` object + :param link_name: Link path + :param target_name: Handle for target group or dataset + :param str link_type: "soft" or "hard" + :param bool overwrite_data: If True, delete existing member (group, + dataset or link) with the same name. Default is False. + """ + if link_name not in h5f: + _logger.debug("Creating link " + link_name + " -> " + target_name) + elif overwrite_data: + _logger.warn("Overwriting " + link_name + " with link to " + + target_name) + del h5f[link_name] + else: + _logger.warn(link_name + " already exist. Can't create link to " + + target_name) + return None + + if link_type == "hard": + h5f[link_name] = h5f[target_name] + elif link_type == "soft": + h5f[link_name] = h5py.SoftLink(target_name) + else: + raise ValueError("link_type must be 'hard' or 'soft'") + + +class Hdf5Writer(object): + """Converter class to write the content of a data file to a HDF5 file. + """ + def __init__(self, + h5path='/', + overwrite_data=False, + link_type="soft", + create_dataset_args=None, + min_size=500): + """ + + :param h5path: Target path where the scan groups will be written + in the output HDF5 file. + :param bool overwrite_data: + See documentation of :func:`write_to_h5` + :param str link_type: ``"hard"`` or ``"soft"`` (default) + :param dict create_dataset_args: Dictionary of args you want to pass to + ``h5py.File.create_dataset``. + See documentation of :func:`write_to_h5` + :param int min_size: + See documentation of :func:`write_to_h5` + """ + self.h5path = h5path + if not h5path.startswith("/"): + # target path must be absolute + self.h5path = "/" + h5path + if not self.h5path.endswith("/"): + self.h5path += "/" + + self._h5f = None + """h5py.File object, assigned in :meth:`write`""" + + if create_dataset_args is None: + create_dataset_args = {} + self.create_dataset_args = create_dataset_args + + self.min_size = min_size + + self.overwrite_data = overwrite_data # boolean + + self.link_type = link_type + """'soft' or 'hard' """ + + self._links = [] + """List of *(link_path, target_path)* tuples.""" + + def write(self, infile, h5f): + """Do the conversion from :attr:`sfh5` (Spec file) to *h5f* (HDF5) + + All the parameters needed for the conversion have been initialized + in the constructor. + + :param infile: :class:`SpecH5` object + :param h5f: :class:`h5py.File` instance + """ + # Recurse through all groups and datasets to add them to the HDF5 + self._h5f = h5f + infile.visititems(self.append_member_to_h5, visit_links=True) + + # Handle the attributes of the root group + root_grp = h5f[self.h5path] + for key in infile.attrs: + if self.overwrite_data or key not in root_grp.attrs: + root_grp.attrs.create(key, + numpy.string_(infile.attrs[key])) + + # Handle links at the end, when their targets are created + for link_name, target_name in self._links: + _create_link(self._h5f, link_name, target_name, + link_type=self.link_type, + overwrite_data=self.overwrite_data) + self._links = [] + + def append_member_to_h5(self, h5like_name, obj): + """Add one group or one dataset to :attr:`h5f`""" + h5_name = self.h5path + h5like_name.lstrip("/") + + if is_softlink(obj): + # links to be created after all groups and datasets + h5_target = self.h5path + obj.path.lstrip("/") + self._links.append((h5_name, h5_target)) + + elif is_dataset(obj): + _logger.debug("Saving dataset: " + h5_name) + + member_initially_exists = h5_name in self._h5f + + if self.overwrite_data and member_initially_exists: + _logger.warn("Overwriting dataset: " + h5_name) + del self._h5f[h5_name] + + if self.overwrite_data or not member_initially_exists: + # fancy arguments don't apply to small dataset + if obj.size < self.min_size: + ds = self._h5f.create_dataset(h5_name, data=obj.value) + else: + ds = self._h5f.create_dataset(h5_name, data=obj.value, + **self.create_dataset_args) + else: + ds = self._h5f[h5_name] + + # add HDF5 attributes + for key in obj.attrs: + if self.overwrite_data or key not in ds.attrs: + ds.attrs.create(key, numpy.string_(obj.attrs[key])) + + if not self.overwrite_data and member_initially_exists: + _logger.warn("Ignoring existing dataset: " + h5_name) + + elif is_group(obj): + if h5_name not in self._h5f: + _logger.debug("Creating group: " + h5_name) + grp = self._h5f.create_group(h5_name) + else: + grp = self._h5f[h5_name] + + # add HDF5 attributes + for key in obj.attrs: + if self.overwrite_data or key not in grp.attrs: + grp.attrs.create(key, numpy.string_(obj.attrs[key])) + + +def write_to_h5(infile, h5file, h5path='/', mode="a", + overwrite_data=False, link_type="soft", + create_dataset_args=None, min_size=500): + """Write content of a h5py-like object into a HDF5 file. + + :param infile: Path of input file, or :class:`commonh5.File` object + or :class:`commonh5.Group` object + :param h5file: Path of output HDF5 file or HDF5 file handle + (`h5py.File` object) + :param str h5path: Target path in HDF5 file in which scan groups are created. + Default is root (``"/"``) + :param str mode: Can be ``"r+"`` (read/write, file must exist), + ``"w"`` (write, existing file is lost), ``"w-"`` (write, fail + if exists) or ``"a"`` (read/write if exists, create otherwise). + This parameter is ignored if ``h5file`` is a file handle. + :param bool overwrite_data: If ``True``, existing groups and datasets can be + overwritten, if ``False`` they are skipped. This parameter is only + relevant if ``file_mode`` is ``"r+"`` or ``"a"``. + :param str link_type: *"soft"* (default) or *"hard"* + :param dict create_dataset_args: Dictionary of args you want to pass to + ``h5py.File.create_dataset``. This allows you to specify filters and + compression parameters. Don't specify ``name`` and ``data``. + These arguments are only applied to datasets larger than 1MB. + :param int min_size: Minimum number of elements in a dataset to apply + chunking and compression. Default is 500. + + The structure of the spec data in an HDF5 file is described in the + documentation of :mod:`silx.io.spech5`. + """ + writer = Hdf5Writer(h5path=h5path, + overwrite_data=overwrite_data, + link_type=link_type, + create_dataset_args=create_dataset_args, + min_size=min_size) + + # both infile and h5file can be either file handle or a file name: 4 cases + if not isinstance(h5file, h5py.File) and not is_group(infile): + with silx.io.open(infile) as h5pylike: + with h5py.File(h5file, mode) as h5f: + writer.write(h5pylike, h5f) + elif isinstance(h5file, h5py.File) and not is_group(infile): + with silx.io.open(infile) as h5pylike: + writer.write(h5pylike, h5file) + elif is_group(infile) and not isinstance(h5file, h5py.File): + with h5py.File(h5file, mode) as h5f: + writer.write(infile, h5f) + else: + writer.write(infile, h5file) + + +def convert(infile, h5file, mode="w-", create_dataset_args=None): + """Convert a supported file into an HDF5 file, write scans into the + root group (``/``). + + This is a convenience shortcut to call:: + + write_to_h5(h5like, h5file, h5path='/', + mode="w-", link_type="soft") + + :param infile: Path of input file or :class:`commonh5.File` object + or :class:`commonh5.Group` object + :param h5file: Path of output HDF5 file, or h5py.File object + :param mode: Can be ``"w"`` (write, existing file is + lost), ``"w-"`` (write, fail if exists). This is ignored + if ``h5file`` is a file handle. + :param create_dataset_args: Dictionary of args you want to pass to + ``h5py.File.create_dataset``. This allows you to specify filters and + compression parameters. Don't specify ``name`` and ``data``. + """ + if mode not in ["w", "w-"]: + raise IOError("File mode must be 'w' or 'w-'. Use write_to_h5" + + " to append data to an existing HDF5 file.") + write_to_h5(infile, h5file, h5path='/', mode=mode, + create_dataset_args=create_dataset_args) -- cgit v1.2.3