diff options
Diffstat (limited to 'silx/io/convert.py')
-rw-r--r-- | silx/io/convert.py | 81 |
1 files changed, 57 insertions, 24 deletions
diff --git a/silx/io/convert.py b/silx/io/convert.py index 41f1e36..a2639e6 100644 --- a/silx/io/convert.py +++ b/silx/io/convert.py @@ -1,6 +1,6 @@ # coding: utf-8 # /*########################################################################## -# Copyright (C) 2016-2017 European Synchrotron Radiation Facility +# Copyright (C) 2016-2018 European Synchrotron Radiation Facility # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -28,39 +28,39 @@ supported formats. Read the documentation of :mod:`silx.io.spech5` and :mod:`silx.io.fabioh5` for information on the structure of the output HDF5 files. -Strings are written to the HDF5 datasets as fixed-length ASCII (NumPy *S* type). -This is done in order to produce files that have maximum compatibility with -other HDF5 libraries, as recommended in the -`h5py documentation <http://docs.h5py.org/en/latest/strings.html#how-to-store-text-strings>`_. +Text strings are written to the HDF5 datasets as variable-length utf-8. -If you read the files back with *h5py* in Python 3, you will recover strings -as bytes, which you should decode to transform them into python strings:: +.. warning:: - >>> import h5py - >>> f = h5py.File("myfile.h5") - >>> f["/1.1/instrument/specfile/scan_header"][0] - b'#S 94 ascan del -0.5 0.5 20 1' - >>> f["/1.1/instrument/specfile/scan_header"][0].decode() - '#S 94 ascan del -0.5 0.5 20 1' + The output format for text strings changed in silx version 0.7.0. + Prior to that, text was output as fixed-length ASCII. + + To be on the safe side, when reading back a HDF5 file written with an + older version of silx, you can test for the presence of a *decode* + attribute. To ensure that you always work with unicode text:: + + >>> import h5py + >>> h5f = h5py.File("my_scans.h5", "r") + >>> title = h5f["/68.1/title"] + >>> if hasattr(title, "decode"): + ... title = title.decode() -Arrays of strings, such as file and scan headers, are stored as fixed-length -strings. The length of all strings in an array is equal to the length of the -longest string. Shorter strings are right-padded with blank spaces. .. note:: This module has a dependency on the `h5py <http://www.h5py.org/>`_ library, which is not a mandatory dependency for `silx`. You might need to install it if you don't already have it. """ -import numpy import logging +import numpy import silx.io from silx.io import is_dataset, is_group, is_softlink +from silx.third_party import six __authors__ = ["P. Knobel"] __license__ = "MIT" -__date__ = "14/09/2017" +__date__ = "12/02/2018" _logger = logging.getLogger(__name__) @@ -92,7 +92,7 @@ def _create_link(h5f, link_name, target_name, target_name) del h5f[link_name] else: - _logger.warn(link_name + " already exist. Can't create link to " + + _logger.warn(link_name + " already exist. Cannot create link to " + target_name) return None @@ -104,6 +104,23 @@ def _create_link(h5f, link_name, target_name, raise ValueError("link_type must be 'hard' or 'soft'") +def _attr_utf8(attr_value): + """If attr_value is bytes, make sure we output utf-8 + + :param attr_value: String (possibly bytes if PY2) + :return: Attr ready to be written by h5py as utf8 + """ + if isinstance(attr_value, six.binary_type) or \ + isinstance(attr_value, six.text_type): + out_attr_value = numpy.array( + attr_value, + dtype=h5py.special_dtype(vlen=six.text_type)) + else: + out_attr_value = attr_value + + return out_attr_value + + class Hdf5Writer(object): """Converter class to write the content of a data file to a HDF5 file. """ @@ -168,7 +185,7 @@ class Hdf5Writer(object): for key in infile.attrs: if self.overwrite_data or key not in root_grp.attrs: root_grp.attrs.create(key, - numpy.string_(infile.attrs[key])) + _attr_utf8(infile.attrs[key])) # Handle links at the end, when their targets are created for link_name, target_name in self._links: @@ -208,10 +225,11 @@ class Hdf5Writer(object): # add HDF5 attributes for key in obj.attrs: if self.overwrite_data or key not in ds.attrs: - ds.attrs.create(key, numpy.string_(obj.attrs[key])) + ds.attrs.create(key, + _attr_utf8(obj.attrs[key])) if not self.overwrite_data and member_initially_exists: - _logger.warn("Ignoring existing dataset: " + h5_name) + _logger.warn("Not overwriting existing dataset: " + h5_name) elif is_group(obj): if h5_name not in self._h5f: @@ -223,7 +241,14 @@ class Hdf5Writer(object): # add HDF5 attributes for key in obj.attrs: if self.overwrite_data or key not in grp.attrs: - grp.attrs.create(key, numpy.string_(obj.attrs[key])) + grp.attrs.create(key, + _attr_utf8(obj.attrs[key])) + + +def _is_commonh5_group(grp): + """Return True if grp is a commonh5 group. + (h5py.Group objects are not commonh5 groups)""" + return is_group(grp) and not isinstance(grp, h5py.Group) def write_to_h5(infile, h5file, h5path='/', mode="a", @@ -232,7 +257,7 @@ def write_to_h5(infile, h5file, h5path='/', mode="a", """Write content of a h5py-like object into a HDF5 file. :param infile: Path of input file, or :class:`commonh5.File` object - or :class:`commonh5.Group` object + or :class:`commonh5.Group` object. :param h5file: Path of output HDF5 file or HDF5 file handle (`h5py.File` object) :param str h5path: Target path in HDF5 file in which scan groups are created. @@ -264,15 +289,23 @@ def write_to_h5(infile, h5file, h5path='/', mode="a", # both infile and h5file can be either file handle or a file name: 4 cases if not isinstance(h5file, h5py.File) and not is_group(infile): with silx.io.open(infile) as h5pylike: + if not _is_commonh5_group(h5pylike): + raise IOError("Cannot convert HDF5 file %s to HDF5" % infile) with h5py.File(h5file, mode) as h5f: writer.write(h5pylike, h5f) elif isinstance(h5file, h5py.File) and not is_group(infile): with silx.io.open(infile) as h5pylike: + if not _is_commonh5_group(h5pylike): + raise IOError("Cannot convert HDF5 file %s to HDF5" % infile) writer.write(h5pylike, h5file) elif is_group(infile) and not isinstance(h5file, h5py.File): + if not _is_commonh5_group(infile): + raise IOError("Cannot convert HDF5 file %s to HDF5" % infile.file.name) with h5py.File(h5file, mode) as h5f: writer.write(infile, h5f) else: + if not _is_commonh5_group(infile): + raise IOError("Cannot convert HDF5 file %s to HDF5" % infile.file.name) writer.write(infile, h5file) |