summaryrefslogtreecommitdiff
path: root/silx/io/convert.py
diff options
context:
space:
mode:
Diffstat (limited to 'silx/io/convert.py')
-rw-r--r--silx/io/convert.py81
1 files changed, 57 insertions, 24 deletions
diff --git a/silx/io/convert.py b/silx/io/convert.py
index 41f1e36..a2639e6 100644
--- a/silx/io/convert.py
+++ b/silx/io/convert.py
@@ -1,6 +1,6 @@
# coding: utf-8
# /*##########################################################################
-# Copyright (C) 2016-2017 European Synchrotron Radiation Facility
+# Copyright (C) 2016-2018 European Synchrotron Radiation Facility
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
@@ -28,39 +28,39 @@ supported formats.
Read the documentation of :mod:`silx.io.spech5` and :mod:`silx.io.fabioh5` for
information on the structure of the output HDF5 files.
-Strings are written to the HDF5 datasets as fixed-length ASCII (NumPy *S* type).
-This is done in order to produce files that have maximum compatibility with
-other HDF5 libraries, as recommended in the
-`h5py documentation <http://docs.h5py.org/en/latest/strings.html#how-to-store-text-strings>`_.
+Text strings are written to the HDF5 datasets as variable-length utf-8.
-If you read the files back with *h5py* in Python 3, you will recover strings
-as bytes, which you should decode to transform them into python strings::
+.. warning::
- >>> import h5py
- >>> f = h5py.File("myfile.h5")
- >>> f["/1.1/instrument/specfile/scan_header"][0]
- b'#S 94 ascan del -0.5 0.5 20 1'
- >>> f["/1.1/instrument/specfile/scan_header"][0].decode()
- '#S 94 ascan del -0.5 0.5 20 1'
+ The output format for text strings changed in silx version 0.7.0.
+ Prior to that, text was output as fixed-length ASCII.
+
+ To be on the safe side, when reading back a HDF5 file written with an
+ older version of silx, you can test for the presence of a *decode*
+ attribute. To ensure that you always work with unicode text::
+
+ >>> import h5py
+ >>> h5f = h5py.File("my_scans.h5", "r")
+ >>> title = h5f["/68.1/title"]
+ >>> if hasattr(title, "decode"):
+ ... title = title.decode()
-Arrays of strings, such as file and scan headers, are stored as fixed-length
-strings. The length of all strings in an array is equal to the length of the
-longest string. Shorter strings are right-padded with blank spaces.
.. note:: This module has a dependency on the `h5py <http://www.h5py.org/>`_
library, which is not a mandatory dependency for `silx`. You might need
to install it if you don't already have it.
"""
-import numpy
import logging
+import numpy
import silx.io
from silx.io import is_dataset, is_group, is_softlink
+from silx.third_party import six
__authors__ = ["P. Knobel"]
__license__ = "MIT"
-__date__ = "14/09/2017"
+__date__ = "12/02/2018"
_logger = logging.getLogger(__name__)
@@ -92,7 +92,7 @@ def _create_link(h5f, link_name, target_name,
target_name)
del h5f[link_name]
else:
- _logger.warn(link_name + " already exist. Can't create link to " +
+ _logger.warn(link_name + " already exist. Cannot create link to " +
target_name)
return None
@@ -104,6 +104,23 @@ def _create_link(h5f, link_name, target_name,
raise ValueError("link_type must be 'hard' or 'soft'")
+def _attr_utf8(attr_value):
+ """If attr_value is bytes, make sure we output utf-8
+
+ :param attr_value: String (possibly bytes if PY2)
+ :return: Attr ready to be written by h5py as utf8
+ """
+ if isinstance(attr_value, six.binary_type) or \
+ isinstance(attr_value, six.text_type):
+ out_attr_value = numpy.array(
+ attr_value,
+ dtype=h5py.special_dtype(vlen=six.text_type))
+ else:
+ out_attr_value = attr_value
+
+ return out_attr_value
+
+
class Hdf5Writer(object):
"""Converter class to write the content of a data file to a HDF5 file.
"""
@@ -168,7 +185,7 @@ class Hdf5Writer(object):
for key in infile.attrs:
if self.overwrite_data or key not in root_grp.attrs:
root_grp.attrs.create(key,
- numpy.string_(infile.attrs[key]))
+ _attr_utf8(infile.attrs[key]))
# Handle links at the end, when their targets are created
for link_name, target_name in self._links:
@@ -208,10 +225,11 @@ class Hdf5Writer(object):
# add HDF5 attributes
for key in obj.attrs:
if self.overwrite_data or key not in ds.attrs:
- ds.attrs.create(key, numpy.string_(obj.attrs[key]))
+ ds.attrs.create(key,
+ _attr_utf8(obj.attrs[key]))
if not self.overwrite_data and member_initially_exists:
- _logger.warn("Ignoring existing dataset: " + h5_name)
+ _logger.warn("Not overwriting existing dataset: " + h5_name)
elif is_group(obj):
if h5_name not in self._h5f:
@@ -223,7 +241,14 @@ class Hdf5Writer(object):
# add HDF5 attributes
for key in obj.attrs:
if self.overwrite_data or key not in grp.attrs:
- grp.attrs.create(key, numpy.string_(obj.attrs[key]))
+ grp.attrs.create(key,
+ _attr_utf8(obj.attrs[key]))
+
+
+def _is_commonh5_group(grp):
+ """Return True if grp is a commonh5 group.
+ (h5py.Group objects are not commonh5 groups)"""
+ return is_group(grp) and not isinstance(grp, h5py.Group)
def write_to_h5(infile, h5file, h5path='/', mode="a",
@@ -232,7 +257,7 @@ def write_to_h5(infile, h5file, h5path='/', mode="a",
"""Write content of a h5py-like object into a HDF5 file.
:param infile: Path of input file, or :class:`commonh5.File` object
- or :class:`commonh5.Group` object
+ or :class:`commonh5.Group` object.
:param h5file: Path of output HDF5 file or HDF5 file handle
(`h5py.File` object)
:param str h5path: Target path in HDF5 file in which scan groups are created.
@@ -264,15 +289,23 @@ def write_to_h5(infile, h5file, h5path='/', mode="a",
# both infile and h5file can be either file handle or a file name: 4 cases
if not isinstance(h5file, h5py.File) and not is_group(infile):
with silx.io.open(infile) as h5pylike:
+ if not _is_commonh5_group(h5pylike):
+ raise IOError("Cannot convert HDF5 file %s to HDF5" % infile)
with h5py.File(h5file, mode) as h5f:
writer.write(h5pylike, h5f)
elif isinstance(h5file, h5py.File) and not is_group(infile):
with silx.io.open(infile) as h5pylike:
+ if not _is_commonh5_group(h5pylike):
+ raise IOError("Cannot convert HDF5 file %s to HDF5" % infile)
writer.write(h5pylike, h5file)
elif is_group(infile) and not isinstance(h5file, h5py.File):
+ if not _is_commonh5_group(infile):
+ raise IOError("Cannot convert HDF5 file %s to HDF5" % infile.file.name)
with h5py.File(h5file, mode) as h5f:
writer.write(infile, h5f)
else:
+ if not _is_commonh5_group(infile):
+ raise IOError("Cannot convert HDF5 file %s to HDF5" % infile.file.name)
writer.write(infile, h5file)