diff options
Diffstat (limited to 'silx/io/convert.py')
-rw-r--r-- | silx/io/convert.py | 343 |
1 files changed, 0 insertions, 343 deletions
diff --git a/silx/io/convert.py b/silx/io/convert.py deleted file mode 100644 index 5b809ba..0000000 --- a/silx/io/convert.py +++ /dev/null @@ -1,343 +0,0 @@ -# coding: utf-8 -# /*########################################################################## -# Copyright (C) 2016-2019 European Synchrotron Radiation Facility -# -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to deal -# in the Software without restriction, including without limitation the rights -# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -# copies of the Software, and to permit persons to whom the Software is -# furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included in -# all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -# THE SOFTWARE. -# -# ############################################################################*/ -"""This module provides classes and function to convert file formats supported -by *silx* into HDF5 file. Currently, SPEC file and fabio images are the -supported formats. - -Read the documentation of :mod:`silx.io.spech5` and :mod:`silx.io.fabioh5` for -information on the structure of the output HDF5 files. - -Text strings are written to the HDF5 datasets as variable-length utf-8. - -.. warning:: - - The output format for text strings changed in silx version 0.7.0. - Prior to that, text was output as fixed-length ASCII. - - To be on the safe side, when reading back a HDF5 file written with an - older version of silx, you can test for the presence of a *decode* - attribute. To ensure that you always work with unicode text:: - - >>> import h5py - >>> h5f = h5py.File("my_scans.h5", "r") - >>> title = h5f["/68.1/title"] - >>> if hasattr(title, "decode"): - ... title = title.decode() - - -.. note:: This module has a dependency on the `h5py <http://www.h5py.org/>`_ - library, which is not a mandatory dependency for `silx`. You might need - to install it if you don't already have it. -""" - -__authors__ = ["P. Knobel"] -__license__ = "MIT" -__date__ = "17/07/2018" - - -import logging - -import h5py -import numpy -import six - -import silx.io -from silx.io import is_dataset, is_group, is_softlink -from silx.io import fabioh5 - - -_logger = logging.getLogger(__name__) - - -def _create_link(h5f, link_name, target_name, - link_type="soft", overwrite_data=False): - """Create a link in a HDF5 file - - If member with name ``link_name`` already exists, delete it first or - ignore link depending on global param ``overwrite_data``. - - :param h5f: :class:`h5py.File` object - :param link_name: Link path - :param target_name: Handle for target group or dataset - :param str link_type: "soft" or "hard" - :param bool overwrite_data: If True, delete existing member (group, - dataset or link) with the same name. Default is False. - """ - if link_name not in h5f: - _logger.debug("Creating link " + link_name + " -> " + target_name) - elif overwrite_data: - _logger.warning("Overwriting " + link_name + " with link to " + - target_name) - del h5f[link_name] - else: - _logger.warning(link_name + " already exist. Cannot create link to " + - target_name) - return None - - if link_type == "hard": - h5f[link_name] = h5f[target_name] - elif link_type == "soft": - h5f[link_name] = h5py.SoftLink(target_name) - else: - raise ValueError("link_type must be 'hard' or 'soft'") - - -def _attr_utf8(attr_value): - """If attr_value is bytes, make sure we output utf-8 - - :param attr_value: String (possibly bytes if PY2) - :return: Attr ready to be written by h5py as utf8 - """ - if isinstance(attr_value, six.binary_type) or \ - isinstance(attr_value, six.text_type): - out_attr_value = numpy.array( - attr_value, - dtype=h5py.special_dtype(vlen=six.text_type)) - else: - out_attr_value = attr_value - - return out_attr_value - - -class Hdf5Writer(object): - """Converter class to write the content of a data file to a HDF5 file. - """ - def __init__(self, - h5path='/', - overwrite_data=False, - link_type="soft", - create_dataset_args=None, - min_size=500): - """ - - :param h5path: Target path where the scan groups will be written - in the output HDF5 file. - :param bool overwrite_data: - See documentation of :func:`write_to_h5` - :param str link_type: ``"hard"`` or ``"soft"`` (default) - :param dict create_dataset_args: Dictionary of args you want to pass to - ``h5py.File.create_dataset``. - See documentation of :func:`write_to_h5` - :param int min_size: - See documentation of :func:`write_to_h5` - """ - self.h5path = h5path - if not h5path.startswith("/"): - # target path must be absolute - self.h5path = "/" + h5path - if not self.h5path.endswith("/"): - self.h5path += "/" - - self._h5f = None - """h5py.File object, assigned in :meth:`write`""" - - if create_dataset_args is None: - create_dataset_args = {} - self.create_dataset_args = create_dataset_args - - self.min_size = min_size - - self.overwrite_data = overwrite_data # boolean - - self.link_type = link_type - """'soft' or 'hard' """ - - self._links = [] - """List of *(link_path, target_path)* tuples.""" - - def write(self, infile, h5f): - """Do the conversion from :attr:`sfh5` (Spec file) to *h5f* (HDF5) - - All the parameters needed for the conversion have been initialized - in the constructor. - - :param infile: :class:`SpecH5` object - :param h5f: :class:`h5py.File` instance - """ - # Recurse through all groups and datasets to add them to the HDF5 - self._h5f = h5f - infile.visititems(self.append_member_to_h5, visit_links=True) - - # Handle the attributes of the root group - root_grp = h5f[self.h5path] - for key in infile.attrs: - if self.overwrite_data or key not in root_grp.attrs: - root_grp.attrs.create(key, - _attr_utf8(infile.attrs[key])) - - # Handle links at the end, when their targets are created - for link_name, target_name in self._links: - _create_link(self._h5f, link_name, target_name, - link_type=self.link_type, - overwrite_data=self.overwrite_data) - self._links = [] - - def append_member_to_h5(self, h5like_name, obj): - """Add one group or one dataset to :attr:`h5f`""" - h5_name = self.h5path + h5like_name.lstrip("/") - if is_softlink(obj): - # links to be created after all groups and datasets - h5_target = self.h5path + obj.path.lstrip("/") - self._links.append((h5_name, h5_target)) - - elif is_dataset(obj): - _logger.debug("Saving dataset: " + h5_name) - - member_initially_exists = h5_name in self._h5f - - if self.overwrite_data and member_initially_exists: - _logger.warning("Overwriting dataset: " + h5_name) - del self._h5f[h5_name] - - if self.overwrite_data or not member_initially_exists: - if isinstance(obj, fabioh5.FrameData) and len(obj.shape) > 2: - # special case of multiframe data - # write frame by frame to save memory usage low - ds = self._h5f.create_dataset(h5_name, - shape=obj.shape, - dtype=obj.dtype, - **self.create_dataset_args) - for i, frame in enumerate(obj): - ds[i] = frame - else: - # fancy arguments don't apply to small dataset - if obj.size < self.min_size: - ds = self._h5f.create_dataset(h5_name, data=obj.value) - else: - ds = self._h5f.create_dataset(h5_name, data=obj.value, - **self.create_dataset_args) - else: - ds = self._h5f[h5_name] - - # add HDF5 attributes - for key in obj.attrs: - if self.overwrite_data or key not in ds.attrs: - ds.attrs.create(key, - _attr_utf8(obj.attrs[key])) - - if not self.overwrite_data and member_initially_exists: - _logger.warning("Not overwriting existing dataset: " + h5_name) - - elif is_group(obj): - if h5_name not in self._h5f: - _logger.debug("Creating group: " + h5_name) - grp = self._h5f.create_group(h5_name) - else: - grp = self._h5f[h5_name] - - # add HDF5 attributes - for key in obj.attrs: - if self.overwrite_data or key not in grp.attrs: - grp.attrs.create(key, - _attr_utf8(obj.attrs[key])) - - -def _is_commonh5_group(grp): - """Return True if grp is a commonh5 group. - (h5py.Group objects are not commonh5 groups)""" - return is_group(grp) and not isinstance(grp, h5py.Group) - - -def write_to_h5(infile, h5file, h5path='/', mode="a", - overwrite_data=False, link_type="soft", - create_dataset_args=None, min_size=500): - """Write content of a h5py-like object into a HDF5 file. - - :param infile: Path of input file, or :class:`commonh5.File` object - or :class:`commonh5.Group` object. - :param h5file: Path of output HDF5 file or HDF5 file handle - (`h5py.File` object) - :param str h5path: Target path in HDF5 file in which scan groups are created. - Default is root (``"/"``) - :param str mode: Can be ``"r+"`` (read/write, file must exist), - ``"w"`` (write, existing file is lost), ``"w-"`` (write, fail - if exists) or ``"a"`` (read/write if exists, create otherwise). - This parameter is ignored if ``h5file`` is a file handle. - :param bool overwrite_data: If ``True``, existing groups and datasets can be - overwritten, if ``False`` they are skipped. This parameter is only - relevant if ``file_mode`` is ``"r+"`` or ``"a"``. - :param str link_type: *"soft"* (default) or *"hard"* - :param dict create_dataset_args: Dictionary of args you want to pass to - ``h5py.File.create_dataset``. This allows you to specify filters and - compression parameters. Don't specify ``name`` and ``data``. - These arguments are only applied to datasets larger than 1MB. - :param int min_size: Minimum number of elements in a dataset to apply - chunking and compression. Default is 500. - - The structure of the spec data in an HDF5 file is described in the - documentation of :mod:`silx.io.spech5`. - """ - writer = Hdf5Writer(h5path=h5path, - overwrite_data=overwrite_data, - link_type=link_type, - create_dataset_args=create_dataset_args, - min_size=min_size) - - # both infile and h5file can be either file handle or a file name: 4 cases - if not isinstance(h5file, h5py.File) and not is_group(infile): - with silx.io.open(infile) as h5pylike: - if not _is_commonh5_group(h5pylike): - raise IOError("Cannot convert HDF5 file %s to HDF5" % infile) - with h5py.File(h5file, mode) as h5f: - writer.write(h5pylike, h5f) - elif isinstance(h5file, h5py.File) and not is_group(infile): - with silx.io.open(infile) as h5pylike: - if not _is_commonh5_group(h5pylike): - raise IOError("Cannot convert HDF5 file %s to HDF5" % infile) - writer.write(h5pylike, h5file) - elif is_group(infile) and not isinstance(h5file, h5py.File): - if not _is_commonh5_group(infile): - raise IOError("Cannot convert HDF5 file %s to HDF5" % infile.file.name) - with h5py.File(h5file, mode) as h5f: - writer.write(infile, h5f) - else: - if not _is_commonh5_group(infile): - raise IOError("Cannot convert HDF5 file %s to HDF5" % infile.file.name) - writer.write(infile, h5file) - - -def convert(infile, h5file, mode="w-", create_dataset_args=None): - """Convert a supported file into an HDF5 file, write scans into the - root group (``/``). - - This is a convenience shortcut to call:: - - write_to_h5(h5like, h5file, h5path='/', - mode="w-", link_type="soft") - - :param infile: Path of input file or :class:`commonh5.File` object - or :class:`commonh5.Group` object - :param h5file: Path of output HDF5 file, or h5py.File object - :param mode: Can be ``"w"`` (write, existing file is - lost), ``"w-"`` (write, fail if exists). This is ignored - if ``h5file`` is a file handle. - :param create_dataset_args: Dictionary of args you want to pass to - ``h5py.File.create_dataset``. This allows you to specify filters and - compression parameters. Don't specify ``name`` and ``data``. - """ - if mode not in ["w", "w-"]: - raise IOError("File mode must be 'w' or 'w-'. Use write_to_h5" + - " to append data to an existing HDF5 file.") - write_to_h5(infile, h5file, h5path='/', mode=mode, - create_dataset_args=create_dataset_args) |