summaryrefslogtreecommitdiff
path: root/silx/io
diff options
context:
space:
mode:
Diffstat (limited to 'silx/io')
-rw-r--r--silx/io/commonh5.py22
-rw-r--r--silx/io/dictdump.py421
-rwxr-xr-xsilx/io/fabioh5.py10
-rw-r--r--silx/io/nxdata/parse.py4
-rw-r--r--silx/io/setup.py2
-rw-r--r--silx/io/specfile/src/locale_management.c5
-rw-r--r--silx/io/test/test_dictdump.py257
-rw-r--r--silx/io/test/test_spectoh5.py3
-rw-r--r--silx/io/test/test_url.py10
-rw-r--r--silx/io/test/test_utils.py244
-rw-r--r--silx/io/url.py21
-rw-r--r--silx/io/utils.py331
12 files changed, 1071 insertions, 259 deletions
diff --git a/silx/io/commonh5.py b/silx/io/commonh5.py
index b624816..57232d8 100644
--- a/silx/io/commonh5.py
+++ b/silx/io/commonh5.py
@@ -1,6 +1,6 @@
# coding: utf-8
# /*##########################################################################
-# Copyright (C) 2016-2019 European Synchrotron Radiation Facility
+# Copyright (C) 2016-2020 European Synchrotron Radiation Facility
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
@@ -376,6 +376,24 @@ class Dataset(Node):
There is no chunks."""
return None
+ @property
+ def is_virtual(self):
+ """Checks virtual data as provided by `h5py.Dataset`"""
+ return False
+
+ def virtual_sources(self):
+ """Returns virtual dataset sources as provided by `h5py.Dataset`.
+
+ :rtype: list"""
+ raise RuntimeError("Not a virtual dataset")
+
+ @property
+ def external(self):
+ """Returns external sources as provided by `h5py.Dataset`.
+
+ :rtype: list or None"""
+ return None
+
def __array__(self, dtype=None):
# Special case for (0,)*-shape datasets
if numpy.product(self.shape) == 0:
@@ -958,7 +976,7 @@ class Group(Node):
raise TypeError("Path are not supported")
if data is None:
if dtype is None:
- dtype = numpy.float
+ dtype = numpy.float64
data = numpy.empty(shape=shape, dtype=dtype)
elif dtype is not None:
data = data.astype(dtype)
diff --git a/silx/io/dictdump.py b/silx/io/dictdump.py
index f2318e0..bbb244a 100644
--- a/silx/io/dictdump.py
+++ b/silx/io/dictdump.py
@@ -34,9 +34,11 @@ import sys
import h5py
from .configdict import ConfigDict
-from .utils import is_group
+from .utils import is_group, is_link, is_softlink, is_externallink
from .utils import is_file as is_h5_file_like
from .utils import open as h5open
+from .utils import h5py_read_dataset
+from .utils import H5pyAttributesReadWrapper
__authors__ = ["P. Knobel"]
__license__ = "MIT"
@@ -44,35 +46,24 @@ __date__ = "17/07/2018"
logger = logging.getLogger(__name__)
-string_types = (basestring,) if sys.version_info[0] == 2 else (str,) # noqa
+vlen_utf8 = h5py.special_dtype(vlen=str)
+vlen_bytes = h5py.special_dtype(vlen=bytes)
-def _prepare_hdf5_dataset(array_like):
+def _prepare_hdf5_write_value(array_like):
"""Cast a python object into a numpy array in a HDF5 friendly format.
:param array_like: Input dataset in a type that can be digested by
``numpy.array()`` (`str`, `list`, `numpy.ndarray`…)
:return: ``numpy.ndarray`` ready to be written as an HDF5 dataset
"""
- # simple strings
- if isinstance(array_like, string_types):
- array_like = numpy.string_(array_like)
-
- # Ensure our data is a numpy.ndarray
- if not isinstance(array_like, (numpy.ndarray, numpy.string_)):
- array = numpy.array(array_like)
+ array = numpy.asarray(array_like)
+ if numpy.issubdtype(array.dtype, numpy.bytes_):
+ return numpy.array(array_like, dtype=vlen_bytes)
+ elif numpy.issubdtype(array.dtype, numpy.str_):
+ return numpy.array(array_like, dtype=vlen_utf8)
else:
- array = array_like
-
- # handle list of strings or numpy array of strings
- if not isinstance(array, numpy.string_):
- data_kind = array.dtype.kind
- # unicode: convert to byte strings
- # (http://docs.h5py.org/en/latest/strings.html)
- if data_kind.lower() in ["s", "u"]:
- array = numpy.asarray(array, dtype=numpy.string_)
-
- return array
+ return array
class _SafeH5FileWrite(object):
@@ -219,150 +210,145 @@ def dicttoh5(treedict, h5file, h5path='/',
h5f.create_group(h5path)
for key in filter(lambda k: not isinstance(k, tuple), treedict):
- if isinstance(treedict[key], dict) and len(treedict[key]):
+ key_is_group = isinstance(treedict[key], dict)
+ h5name = h5path + key
+
+ if key_is_group and treedict[key]:
# non-empty group: recurse
- dicttoh5(treedict[key], h5f, h5path + key,
+ dicttoh5(treedict[key], h5f, h5name,
overwrite_data=overwrite_data,
create_dataset_args=create_dataset_args)
+ continue
- elif treedict[key] is None or (isinstance(treedict[key], dict) and
- not len(treedict[key])):
- if (h5path + key) in h5f:
- if overwrite_data is True:
- del h5f[h5path + key]
- else:
- logger.warning('key (%s) already exists. '
- 'Not overwriting.' % (h5path + key))
- continue
- # Create empty group
- h5f.create_group(h5path + key)
+ if h5name in h5f:
+ # key already exists: delete or skip
+ if overwrite_data is True:
+ del h5f[h5name]
+ else:
+ logger.warning('key (%s) already exists. '
+ 'Not overwriting.' % (h5name))
+ continue
+
+ value = treedict[key]
+ if value is None or key_is_group:
+ # Create empty group
+ h5f.create_group(h5name)
+ elif is_link(value):
+ h5f[h5name] = value
else:
- ds = _prepare_hdf5_dataset(treedict[key])
+ data = _prepare_hdf5_write_value(value)
# can't apply filters on scalars (datasets with shape == () )
- if ds.shape == () or create_dataset_args is None:
- if h5path + key in h5f:
- if overwrite_data is True:
- del h5f[h5path + key]
- else:
- logger.warning('key (%s) already exists. '
- 'Not overwriting.' % (h5path + key))
- continue
-
- h5f.create_dataset(h5path + key,
- data=ds)
+ if data.shape == () or create_dataset_args is None:
+ h5f.create_dataset(h5name,
+ data=data)
else:
- if h5path + key in h5f:
- if overwrite_data is True:
- del h5f[h5path + key]
- else:
- logger.warning('key (%s) already exists. '
- 'Not overwriting.' % (h5path + key))
- continue
-
- h5f.create_dataset(h5path + key,
- data=ds,
+ h5f.create_dataset(h5name,
+ data=data,
**create_dataset_args)
# deal with h5 attributes which have tuples as keys in treedict
for key in filter(lambda k: isinstance(k, tuple), treedict):
- if (h5path + key[0]) not in h5f:
+ assert len(key) == 2, "attribute must be defined by 2 values"
+ h5name = h5path + key[0]
+ attr_name = key[1]
+
+ if h5name not in h5f:
# Create empty group if key for attr does not exist
- h5f.create_group(h5path + key[0])
+ h5f.create_group(h5name)
logger.warning(
"key (%s) does not exist. attr %s "
- "will be written to ." % (h5path + key[0], key[1])
+ "will be written to ." % (h5name, attr_name)
)
- if key[1] in h5f[h5path + key[0]].attrs:
+ if attr_name in h5f[h5name].attrs:
if not overwrite_data:
logger.warning(
"attribute %s@%s already exists. Not overwriting."
- "" % (h5path + key[0], key[1])
+ "" % (h5name, attr_name)
)
continue
# Write attribute
value = treedict[key]
+ data = _prepare_hdf5_write_value(value)
+ h5f[h5name].attrs[attr_name] = data
- # Makes list/tuple of str being encoded as vlen unicode array
- # Workaround for h5py<2.9.0 (e.g. debian 10).
- if (isinstance(value, (list, tuple)) and
- numpy.asarray(value).dtype.type == numpy.unicode_):
- value = numpy.array(value, dtype=h5py.special_dtype(vlen=str))
-
- h5f[h5path + key[0]].attrs[key[1]] = value
-
-def dicttonx(
- treedict,
- h5file,
- h5path="/",
- mode="w",
- overwrite_data=False,
- create_dataset_args=None,
-):
- """
- Write a nested dictionary to a HDF5 file, using string keys as member names.
- The NeXus convention is used to identify attributes with ``"@"`` character,
- therefor the dataset_names should not contain ``"@"``.
+def nexus_to_h5_dict(treedict, parents=tuple()):
+ """The following conversions are applied:
+ * key with "{name}@{attr_name}" notation: key converted to 2-tuple
+ * key with ">{url}" notation: strip ">" and convert value to
+ h5py.SoftLink or h5py.ExternalLink
:param treedict: Nested dictionary/tree structure with strings as keys
and array-like objects as leafs. The ``"/"`` character can be used
to define sub tree. The ``"@"`` character is used to write attributes.
+ The ``">"`` prefix is used to define links.
+ :param parents: Needed to resolve up-links (tuple of HDF5 group names)
- Detais on all other params can be found in doc of dicttoh5.
+ :rtype dict:
+ """
+ copy = dict()
+ for key, value in treedict.items():
+ if "@" in key:
+ key = tuple(key.rsplit("@", 1))
+ elif key.startswith(">"):
+ if isinstance(value, str):
+ key = key[1:]
+ first, sep, second = value.partition("::")
+ if sep:
+ value = h5py.ExternalLink(first, second)
+ else:
+ if ".." in first:
+ # Up-links not supported: make absolute
+ parts = []
+ for p in list(parents) + first.split("/"):
+ if not p or p == ".":
+ continue
+ elif p == "..":
+ parts.pop(-1)
+ else:
+ parts.append(p)
+ first = "/" + "/".join(parts)
+ value = h5py.SoftLink(first)
+ elif is_link(value):
+ key = key[1:]
+ if isinstance(value, dict):
+ copy[key] = nexus_to_h5_dict(value, parents=parents+(key,))
+ else:
+ copy[key] = value
+ return copy
- Example::
- import numpy
- from silx.io.dictdump import dicttonx
+def h5_to_nexus_dict(treedict):
+ """The following conversions are applied:
+ * 2-tuple key: converted to string ("@" notation)
+ * h5py.Softlink value: converted to string (">" key prefix)
+ * h5py.ExternalLink value: converted to string (">" key prefix)
- gauss = {
- "entry":{
- "title":u"A plot of a gaussian",
- "plot": {
- "y": numpy.array([0.08, 0.19, 0.39, 0.66, 0.9, 1.,
- 0.9, 0.66, 0.39, 0.19, 0.08]),
- "x": numpy.arange(0,1.1,.1),
- "@signal": "y",
- "@axes": "x",
- "@NX_class":u"NXdata",
- "title:u"Gauss Plot",
- },
- "@NX_class":u"NXentry",
- "default":"plot",
- }
- "@NX_class": u"NXroot",
- "@default": "entry",
- }
+ :param treedict: Nested dictionary/tree structure with strings as keys
+ and array-like objects as leafs. The ``"/"`` character can be used
+ to define sub tree.
- dicttonx(gauss,"test.h5")
+ :rtype dict:
"""
-
- def copy_keys_keep_values(original):
- # create a new treedict with with modified keys but keep values
- copy = dict()
- for key, value in original.items():
- if "@" in key:
- newkey = tuple(key.rsplit("@", 1))
- else:
- newkey = key
- if isinstance(value, dict):
- copy[newkey] = copy_keys_keep_values(value)
- else:
- copy[newkey] = value
- return copy
-
- nxtreedict = copy_keys_keep_values(treedict)
- dicttoh5(
- nxtreedict,
- h5file,
- h5path=h5path,
- mode=mode,
- overwrite_data=overwrite_data,
- create_dataset_args=create_dataset_args,
- )
+ copy = dict()
+ for key, value in treedict.items():
+ if isinstance(key, tuple):
+ assert len(key)==2, "attribute must be defined by 2 values"
+ key = "%s@%s" % (key[0], key[1])
+ elif is_softlink(value):
+ key = ">" + key
+ value = value.path
+ elif is_externallink(value):
+ key = ">" + key
+ value = value.filename + "::" + value.path
+ if isinstance(value, dict):
+ copy[key] = h5_to_nexus_dict(value)
+ else:
+ copy[key] = value
+ return copy
def _name_contains_string_in_list(name, strlist):
@@ -374,7 +360,31 @@ def _name_contains_string_in_list(name, strlist):
return False
-def h5todict(h5file, path="/", exclude_names=None, asarray=True):
+def _handle_error(mode: str, exception, msg: str, *args) -> None:
+ """Handle errors.
+
+ :param str mode: 'raise', 'log', 'ignore'
+ :param type exception: Exception class to use in 'raise' mode
+ :param str msg: Error message template
+ :param List[str] args: Arguments for error message template
+ """
+ if mode == 'ignore':
+ return # no-op
+ elif mode == 'log':
+ logger.error(msg, *args)
+ elif mode == 'raise':
+ raise exception(msg % args)
+ else:
+ raise ValueError("Unsupported error handling: %s" % mode)
+
+
+def h5todict(h5file,
+ path="/",
+ exclude_names=None,
+ asarray=True,
+ dereference_links=True,
+ include_attributes=False,
+ errors='raise'):
"""Read a HDF5 file and return a nested dictionary with the complete file
structure and all data.
@@ -397,7 +407,7 @@ def h5todict(h5file, path="/", exclude_names=None, asarray=True):
.. note:: This function requires `h5py <http://www.h5py.org/>`_ to be
installed.
- .. note:: If you write a dictionary to a HDF5 file with
+ .. note:: If you write a dictionary to a HDF5 file with
:func:`dicttoh5` and then read it back with :func:`h5todict`, data
types are not preserved. All values are cast to numpy arrays before
being written to file, and they are read back as numpy arrays (or
@@ -412,28 +422,159 @@ def h5todict(h5file, path="/", exclude_names=None, asarray=True):
a string in this list will be ignored. Default is None (ignore nothing)
:param bool asarray: True (default) to read scalar as arrays, False to
read them as scalar
+ :param bool dereference_links: True (default) to dereference links, False
+ to preserve the link itself
+ :param bool include_attributes: False (default)
+ :param str errors: Handling of errors (HDF5 access issue, broken link,...):
+ - 'raise' (default): Raise an exception
+ - 'log': Log as errors
+ - 'ignore': Ignore errors
:return: Nested dictionary
"""
with _SafeH5FileRead(h5file) as h5f:
ddict = {}
- for key in h5f[path]:
+ if path not in h5f:
+ _handle_error(
+ errors, KeyError, 'Path "%s" does not exist in file.', path)
+ return ddict
+
+ try:
+ root = h5f[path]
+ except KeyError as e:
+ if not isinstance(h5f.get(path, getlink=True), h5py.HardLink):
+ _handle_error(errors,
+ KeyError,
+ 'Cannot retrieve path "%s" (broken link)',
+ path)
+ else:
+ _handle_error(errors, KeyError, ', '.join(e.args))
+ return ddict
+
+ # Read the attributes of the group
+ if include_attributes:
+ attrs = H5pyAttributesReadWrapper(root.attrs)
+ for aname, avalue in attrs.items():
+ ddict[("", aname)] = avalue
+ # Read the children of the group
+ for key in root:
if _name_contains_string_in_list(key, exclude_names):
continue
- if is_group(h5f[path + "/" + key]):
+ h5name = path + "/" + key
+ # Preserve HDF5 link when requested
+ if not dereference_links:
+ lnk = h5f.get(h5name, getlink=True)
+ if is_link(lnk):
+ ddict[key] = lnk
+ continue
+
+ try:
+ h5obj = h5f[h5name]
+ except KeyError as e:
+ if not isinstance(h5f.get(h5name, getlink=True), h5py.HardLink):
+ _handle_error(errors,
+ KeyError,
+ 'Cannot retrieve path "%s" (broken link)',
+ h5name)
+ else:
+ _handle_error(errors, KeyError, ', '.join(e.args))
+ continue
+
+ if is_group(h5obj):
+ # Child is an HDF5 group
ddict[key] = h5todict(h5f,
- path + "/" + key,
+ h5name,
exclude_names=exclude_names,
- asarray=asarray)
+ asarray=asarray,
+ dereference_links=dereference_links,
+ include_attributes=include_attributes)
else:
- # Read HDF5 datset
- data = h5f[path + "/" + key][()]
- if asarray: # Convert HDF5 dataset to numpy array
- data = numpy.array(data, copy=False)
- ddict[key] = data
-
+ # Child is an HDF5 dataset
+ try:
+ data = h5py_read_dataset(h5obj)
+ except OSError:
+ _handle_error(errors,
+ OSError,
+ 'Cannot retrieve dataset "%s"',
+ h5name)
+ else:
+ if asarray: # Convert HDF5 dataset to numpy array
+ data = numpy.array(data, copy=False)
+ ddict[key] = data
+ # Read the attributes of the child
+ if include_attributes:
+ attrs = H5pyAttributesReadWrapper(h5obj.attrs)
+ for aname, avalue in attrs.items():
+ ddict[(key, aname)] = avalue
return ddict
+def dicttonx(treedict, h5file, h5path="/", **kw):
+ """
+ Write a nested dictionary to a HDF5 file, using string keys as member names.
+ The NeXus convention is used to identify attributes with ``"@"`` character,
+ therefore the dataset_names should not contain ``"@"``.
+
+ Similarly, links are identified by keys starting with the ``">"`` character.
+ The corresponding value can be a soft or external link.
+
+ :param treedict: Nested dictionary/tree structure with strings as keys
+ and array-like objects as leafs. The ``"/"`` character can be used
+ to define sub tree. The ``"@"`` character is used to write attributes.
+ The ``">"`` prefix is used to define links.
+
+ The named parameters are passed to dicttoh5.
+
+ Example::
+
+ import numpy
+ from silx.io.dictdump import dicttonx
+
+ gauss = {
+ "entry":{
+ "title":u"A plot of a gaussian",
+ "instrument": {
+ "@NX_class": u"NXinstrument",
+ "positioners": {
+ "@NX_class": u"NXCollection",
+ "x": numpy.arange(0,1.1,.1)
+ }
+ }
+ "plot": {
+ "y": numpy.array([0.08, 0.19, 0.39, 0.66, 0.9, 1.,
+ 0.9, 0.66, 0.39, 0.19, 0.08]),
+ ">x": "../instrument/positioners/x",
+ "@signal": "y",
+ "@axes": "x",
+ "@NX_class":u"NXdata",
+ "title:u"Gauss Plot",
+ },
+ "@NX_class": u"NXentry",
+ "default":"plot",
+ }
+ "@NX_class": u"NXroot",
+ "@default": "entry",
+ }
+
+ dicttonx(gauss,"test.h5")
+ """
+ parents = tuple(p for p in h5path.split("/") if p)
+ nxtreedict = nexus_to_h5_dict(treedict, parents=parents)
+ dicttoh5(nxtreedict, h5file, h5path=h5path, **kw)
+
+
+def nxtodict(h5file, **kw):
+ """Read a HDF5 file and return a nested dictionary with the complete file
+ structure and all data.
+
+ As opposed to h5todict, all keys will be strings and no h5py objects are
+ present in the tree.
+
+ The named parameters are passed to h5todict.
+ """
+ nxtreedict = h5todict(h5file, **kw)
+ return h5_to_nexus_dict(nxtreedict)
+
+
def dicttojson(ddict, jsonfile, indent=None, mode="w"):
"""Serialize ``ddict`` as a JSON formatted stream to ``jsonfile``.
diff --git a/silx/io/fabioh5.py b/silx/io/fabioh5.py
index cfaa0a0..2fd719d 100755
--- a/silx/io/fabioh5.py
+++ b/silx/io/fabioh5.py
@@ -1,6 +1,6 @@
# coding: utf-8
# /*##########################################################################
-# Copyright (C) 2016-2019 European Synchrotron Radiation Facility
+# Copyright (C) 2016-2020 European Synchrotron Radiation Facility
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
@@ -656,13 +656,13 @@ class FabioReader(object):
elif result_type.kind == "U":
none_value = u""
elif result_type.kind == "f":
- none_value = numpy.float("NaN")
+ none_value = numpy.float64("NaN")
elif result_type.kind == "i":
- none_value = numpy.int(0)
+ none_value = numpy.int64(0)
elif result_type.kind == "u":
- none_value = numpy.int(0)
+ none_value = numpy.int64(0)
elif result_type.kind == "b":
- none_value = numpy.bool(False)
+ none_value = numpy.bool_(False)
else:
none_value = None
diff --git a/silx/io/nxdata/parse.py b/silx/io/nxdata/parse.py
index 6bd18d6..b1c1bba 100644
--- a/silx/io/nxdata/parse.py
+++ b/silx/io/nxdata/parse.py
@@ -45,7 +45,7 @@ import json
import numpy
import six
-from silx.io.utils import is_group, is_file, is_dataset
+from silx.io.utils import is_group, is_file, is_dataset, h5py_read_dataset
from ._utils import get_attr_as_unicode, INTERPDIM, nxdata_logger, \
get_uncertainties_names, get_signal_name, \
@@ -628,7 +628,7 @@ class NXdata(object):
data_dataset_names = [self.signal_name] + self.axes_dataset_names
if (title is not None and is_dataset(title) and
"title" not in data_dataset_names):
- return str(title[()])
+ return str(h5py_read_dataset(title))
title = self.group.attrs.get("title")
if title is None:
diff --git a/silx/io/setup.py b/silx/io/setup.py
index 4aaf324..9cafa17 100644
--- a/silx/io/setup.py
+++ b/silx/io/setup.py
@@ -51,7 +51,7 @@ else:
SPECFILE_USE_GNU_SOURCE = int(SPECFILE_USE_GNU_SOURCE)
if sys.platform == "win32":
- define_macros = [('WIN32', None)]
+ define_macros = [('WIN32', None), ('SPECFILE_POSIX', None)]
elif os.name.lower().startswith('posix'):
define_macros = [('SPECFILE_POSIX', None)]
# the best choice is to have _GNU_SOURCE defined
diff --git a/silx/io/specfile/src/locale_management.c b/silx/io/specfile/src/locale_management.c
index 54695f5..0c5f7ca 100644
--- a/silx/io/specfile/src/locale_management.c
+++ b/silx/io/specfile/src/locale_management.c
@@ -39,6 +39,9 @@
# else
# ifdef SPECFILE_POSIX
# include <locale.h>
+# ifndef LOCALE_NAME_MAX_LENGTH
+# define LOCALE_NAME_MAX_LENGTH 85
+# endif
# endif
# endif
#endif
@@ -60,7 +63,7 @@ double PyMcaAtof(const char * inputString)
#else
#ifdef SPECFILE_POSIX
char *currentLocaleBuffer;
- char localeBuffer[21];
+ char localeBuffer[LOCALE_NAME_MAX_LENGTH + 1] = {'\0'};
double result;
currentLocaleBuffer = setlocale(LC_NUMERIC, NULL);
strcpy(localeBuffer, currentLocaleBuffer);
diff --git a/silx/io/test/test_dictdump.py b/silx/io/test/test_dictdump.py
index c0b6914..b99116b 100644
--- a/silx/io/test/test_dictdump.py
+++ b/silx/io/test/test_dictdump.py
@@ -43,6 +43,8 @@ from .. import dictdump
from ..dictdump import dicttoh5, dicttojson, dump
from ..dictdump import h5todict, load
from ..dictdump import logger as dictdump_logger
+from ..utils import is_link
+from ..utils import h5py_read_dataset
def tree():
@@ -58,15 +60,29 @@ city_attrs["Europe"]["France"]["Grenoble"]["inhabitants"] = inhabitants
city_attrs["Europe"]["France"]["Grenoble"]["coordinates"] = [45.1830, 5.7196]
city_attrs["Europe"]["France"]["Tourcoing"]["area"]
+ext_attrs = tree()
+ext_attrs["ext_group"]["dataset"] = 10
+ext_filename = "ext.h5"
+
+link_attrs = tree()
+link_attrs["links"]["group"]["dataset"] = 10
+link_attrs["links"]["group"]["relative_softlink"] = h5py.SoftLink("dataset")
+link_attrs["links"]["relative_softlink"] = h5py.SoftLink("group/dataset")
+link_attrs["links"]["absolute_softlink"] = h5py.SoftLink("/links/group/dataset")
+link_attrs["links"]["external_link"] = h5py.ExternalLink(ext_filename, "/ext_group/dataset")
+
class TestDictToH5(unittest.TestCase):
def setUp(self):
self.tempdir = tempfile.mkdtemp()
self.h5_fname = os.path.join(self.tempdir, "cityattrs.h5")
+ self.h5_ext_fname = os.path.join(self.tempdir, ext_filename)
def tearDown(self):
if os.path.exists(self.h5_fname):
os.unlink(self.h5_fname)
+ if os.path.exists(self.h5_ext_fname):
+ os.unlink(self.h5_ext_fname)
os.rmdir(self.tempdir)
def testH5CityAttrs(self):
@@ -201,31 +217,129 @@ class TestDictToH5(unittest.TestCase):
self.assertEqual(h5file["group/group/dataset"].attrs['attr'], 11)
self.assertEqual(h5file["group/group"].attrs['attr'], 12)
+ def testLinks(self):
+ with h5py.File(self.h5_ext_fname, "w") as h5file:
+ dictdump.dicttoh5(ext_attrs, h5file)
+ with h5py.File(self.h5_fname, "w") as h5file:
+ dictdump.dicttoh5(link_attrs, h5file)
+ with h5py.File(self.h5_fname, "r") as h5file:
+ self.assertEqual(h5file["links/group/dataset"][()], 10)
+ self.assertEqual(h5file["links/group/relative_softlink"][()], 10)
+ self.assertEqual(h5file["links/relative_softlink"][()], 10)
+ self.assertEqual(h5file["links/absolute_softlink"][()], 10)
+ self.assertEqual(h5file["links/external_link"][()], 10)
+
+ def testDumpNumpyArray(self):
+ ddict = {
+ 'darks': {
+ '0': numpy.array([[0, 0, 0], [0, 0, 0]], dtype=numpy.uint16)
+ }
+ }
+ with h5py.File(self.h5_fname, "w") as h5file:
+ dictdump.dicttoh5(ddict, h5file)
+ with h5py.File(self.h5_fname, "r") as h5file:
+ numpy.testing.assert_array_equal(h5py_read_dataset(h5file["darks"]["0"]),
+ ddict['darks']['0'])
+
+
+class TestH5ToDict(unittest.TestCase):
+ def setUp(self):
+ self.tempdir = tempfile.mkdtemp()
+ self.h5_fname = os.path.join(self.tempdir, "cityattrs.h5")
+ self.h5_ext_fname = os.path.join(self.tempdir, ext_filename)
+ dicttoh5(city_attrs, self.h5_fname)
+ dicttoh5(link_attrs, self.h5_fname, mode="a")
+ dicttoh5(ext_attrs, self.h5_ext_fname)
+
+ def tearDown(self):
+ if os.path.exists(self.h5_fname):
+ os.unlink(self.h5_fname)
+ if os.path.exists(self.h5_ext_fname):
+ os.unlink(self.h5_ext_fname)
+ os.rmdir(self.tempdir)
+
+ def testExcludeNames(self):
+ ddict = h5todict(self.h5_fname, path="/Europe/France",
+ exclude_names=["ourcoing", "inhab", "toto"])
+ self.assertNotIn("Tourcoing", ddict)
+ self.assertIn("Grenoble", ddict)
+
+ self.assertNotIn("inhabitants", ddict["Grenoble"])
+ self.assertIn("coordinates", ddict["Grenoble"])
+ self.assertIn("area", ddict["Grenoble"])
+
+ def testAsArrayTrue(self):
+ """Test with asarray=True, the default"""
+ ddict = h5todict(self.h5_fname, path="/Europe/France/Grenoble")
+ self.assertTrue(numpy.array_equal(ddict["inhabitants"], numpy.array(inhabitants)))
+
+ def testAsArrayFalse(self):
+ """Test with asarray=False"""
+ ddict = h5todict(self.h5_fname, path="/Europe/France/Grenoble", asarray=False)
+ self.assertEqual(ddict["inhabitants"], inhabitants)
+
+ def testDereferenceLinks(self):
+ ddict = h5todict(self.h5_fname, path="links", dereference_links=True)
+ self.assertTrue(ddict["absolute_softlink"], 10)
+ self.assertTrue(ddict["relative_softlink"], 10)
+ self.assertTrue(ddict["external_link"], 10)
+ self.assertTrue(ddict["group"]["relative_softlink"], 10)
+
+ def testPreserveLinks(self):
+ ddict = h5todict(self.h5_fname, path="links", dereference_links=False)
+ self.assertTrue(is_link(ddict["absolute_softlink"]))
+ self.assertTrue(is_link(ddict["relative_softlink"]))
+ self.assertTrue(is_link(ddict["external_link"]))
+ self.assertTrue(is_link(ddict["group"]["relative_softlink"]))
+
+ def testStrings(self):
+ ddict = {"dset_bytes": b"bytes",
+ "dset_utf8": "utf8",
+ "dset_2bytes": [b"bytes", b"bytes"],
+ "dset_2utf8": ["utf8", "utf8"],
+ ("", "attr_bytes"): b"bytes",
+ ("", "attr_utf8"): "utf8",
+ ("", "attr_2bytes"): [b"bytes", b"bytes"],
+ ("", "attr_2utf8"): ["utf8", "utf8"]}
+ dicttoh5(ddict, self.h5_fname, mode="w")
+ adict = h5todict(self.h5_fname, include_attributes=True, asarray=False)
+ self.assertEqual(ddict["dset_bytes"], adict["dset_bytes"])
+ self.assertEqual(ddict["dset_utf8"], adict["dset_utf8"])
+ self.assertEqual(ddict[("", "attr_bytes")], adict[("", "attr_bytes")])
+ self.assertEqual(ddict[("", "attr_utf8")], adict[("", "attr_utf8")])
+ numpy.testing.assert_array_equal(ddict["dset_2bytes"], adict["dset_2bytes"])
+ numpy.testing.assert_array_equal(ddict["dset_2utf8"], adict["dset_2utf8"])
+ numpy.testing.assert_array_equal(ddict[("", "attr_2bytes")], adict[("", "attr_2bytes")])
+ numpy.testing.assert_array_equal(ddict[("", "attr_2utf8")], adict[("", "attr_2utf8")])
+
class TestDictToNx(unittest.TestCase):
def setUp(self):
self.tempdir = tempfile.mkdtemp()
self.h5_fname = os.path.join(self.tempdir, "nx.h5")
+ self.h5_ext_fname = os.path.join(self.tempdir, "nx_ext.h5")
def tearDown(self):
if os.path.exists(self.h5_fname):
os.unlink(self.h5_fname)
+ if os.path.exists(self.h5_ext_fname):
+ os.unlink(self.h5_ext_fname)
os.rmdir(self.tempdir)
def testAttributes(self):
"""Any kind of attribute can be described"""
ddict = {
- "group": {"datatset": "hmmm", "@group_attr": 10},
- "dataset": "aaaaaaaaaaaaaaa",
+ "group": {"dataset": 100, "@group_attr1": 10},
+ "dataset": 200,
"@root_attr": 11,
- "dataset@dataset_attr": 12,
+ "dataset@dataset_attr": "12",
"group@group_attr2": 13,
}
with h5py.File(self.h5_fname, "w") as h5file:
dictdump.dicttonx(ddict, h5file)
- self.assertEqual(h5file["group"].attrs['group_attr'], 10)
+ self.assertEqual(h5file["group"].attrs['group_attr1'], 10)
self.assertEqual(h5file.attrs['root_attr'], 11)
- self.assertEqual(h5file["dataset"].attrs['dataset_attr'], 12)
+ self.assertEqual(h5file["dataset"].attrs['dataset_attr'], "12")
self.assertEqual(h5file["group"].attrs['group_attr2'], 13)
def testKeyOrder(self):
@@ -280,36 +394,120 @@ class TestDictToNx(unittest.TestCase):
self.assertEqual(h5file["group/group/dataset"].attrs['attr'], 11)
self.assertEqual(h5file["group/group"].attrs['attr'], 12)
-
-class TestH5ToDict(unittest.TestCase):
+ def testLinks(self):
+ ddict = {"ext_group": {"dataset": 10}}
+ dictdump.dicttonx(ddict, self.h5_ext_fname)
+ ddict = {"links": {"group": {"dataset": 10, ">relative_softlink": "dataset"},
+ ">relative_softlink": "group/dataset",
+ ">absolute_softlink": "/links/group/dataset",
+ ">external_link": "nx_ext.h5::/ext_group/dataset"}}
+ dictdump.dicttonx(ddict, self.h5_fname)
+ with h5py.File(self.h5_fname, "r") as h5file:
+ self.assertEqual(h5file["links/group/dataset"][()], 10)
+ self.assertEqual(h5file["links/group/relative_softlink"][()], 10)
+ self.assertEqual(h5file["links/relative_softlink"][()], 10)
+ self.assertEqual(h5file["links/absolute_softlink"][()], 10)
+ self.assertEqual(h5file["links/external_link"][()], 10)
+
+ def testUpLinks(self):
+ ddict = {"data": {"group": {"dataset": 10, ">relative_softlink": "dataset"}},
+ "links": {"group": {"subgroup": {">relative_softlink": "../../../data/group/dataset"}}}}
+ dictdump.dicttonx(ddict, self.h5_fname)
+ with h5py.File(self.h5_fname, "r") as h5file:
+ self.assertEqual(h5file["/links/group/subgroup/relative_softlink"][()], 10)
+
+
+class TestNxToDict(unittest.TestCase):
def setUp(self):
self.tempdir = tempfile.mkdtemp()
- self.h5_fname = os.path.join(self.tempdir, "cityattrs.h5")
- dicttoh5(city_attrs, self.h5_fname)
+ self.h5_fname = os.path.join(self.tempdir, "nx.h5")
+ self.h5_ext_fname = os.path.join(self.tempdir, "nx_ext.h5")
def tearDown(self):
- os.unlink(self.h5_fname)
+ if os.path.exists(self.h5_fname):
+ os.unlink(self.h5_fname)
+ if os.path.exists(self.h5_ext_fname):
+ os.unlink(self.h5_ext_fname)
os.rmdir(self.tempdir)
- def testExcludeNames(self):
- ddict = h5todict(self.h5_fname, path="/Europe/France",
- exclude_names=["ourcoing", "inhab", "toto"])
- self.assertNotIn("Tourcoing", ddict)
- self.assertIn("Grenoble", ddict)
-
- self.assertNotIn("inhabitants", ddict["Grenoble"])
- self.assertIn("coordinates", ddict["Grenoble"])
- self.assertIn("area", ddict["Grenoble"])
-
- def testAsArrayTrue(self):
- """Test with asarray=True, the default"""
- ddict = h5todict(self.h5_fname, path="/Europe/France/Grenoble")
- self.assertTrue(numpy.array_equal(ddict["inhabitants"], numpy.array(inhabitants)))
-
- def testAsArrayFalse(self):
- """Test with asarray=False"""
- ddict = h5todict(self.h5_fname, path="/Europe/France/Grenoble", asarray=False)
- self.assertEqual(ddict["inhabitants"], inhabitants)
+ def testAttributes(self):
+ """Any kind of attribute can be described"""
+ ddict = {
+ "group": {"dataset": 100, "@group_attr1": 10},
+ "dataset": 200,
+ "@root_attr": 11,
+ "dataset@dataset_attr": "12",
+ "group@group_attr2": 13,
+ }
+ dictdump.dicttonx(ddict, self.h5_fname)
+ ddict = dictdump.nxtodict(self.h5_fname, include_attributes=True)
+ self.assertEqual(ddict["group"]["@group_attr1"], 10)
+ self.assertEqual(ddict["@root_attr"], 11)
+ self.assertEqual(ddict["dataset@dataset_attr"], "12")
+ self.assertEqual(ddict["group"]["@group_attr2"], 13)
+
+ def testDereferenceLinks(self):
+ """Write links and dereference on read"""
+ ddict = {"ext_group": {"dataset": 10}}
+ dictdump.dicttonx(ddict, self.h5_ext_fname)
+ ddict = {"links": {"group": {"dataset": 10, ">relative_softlink": "dataset"},
+ ">relative_softlink": "group/dataset",
+ ">absolute_softlink": "/links/group/dataset",
+ ">external_link": "nx_ext.h5::/ext_group/dataset"}}
+ dictdump.dicttonx(ddict, self.h5_fname)
+
+ ddict = dictdump.h5todict(self.h5_fname, dereference_links=True)
+ self.assertTrue(ddict["links"]["absolute_softlink"], 10)
+ self.assertTrue(ddict["links"]["relative_softlink"], 10)
+ self.assertTrue(ddict["links"]["external_link"], 10)
+ self.assertTrue(ddict["links"]["group"]["relative_softlink"], 10)
+
+ def testPreserveLinks(self):
+ """Write/read links"""
+ ddict = {"ext_group": {"dataset": 10}}
+ dictdump.dicttonx(ddict, self.h5_ext_fname)
+ ddict = {"links": {"group": {"dataset": 10, ">relative_softlink": "dataset"},
+ ">relative_softlink": "group/dataset",
+ ">absolute_softlink": "/links/group/dataset",
+ ">external_link": "nx_ext.h5::/ext_group/dataset"}}
+ dictdump.dicttonx(ddict, self.h5_fname)
+
+ ddict = dictdump.nxtodict(self.h5_fname, dereference_links=False)
+ self.assertTrue(ddict["links"][">absolute_softlink"], "dataset")
+ self.assertTrue(ddict["links"][">relative_softlink"], "group/dataset")
+ self.assertTrue(ddict["links"][">external_link"], "/links/group/dataset")
+ self.assertTrue(ddict["links"]["group"][">relative_softlink"], "nx_ext.h5::/ext_group/datase")
+
+ def testNotExistingPath(self):
+ """Test converting not existing path"""
+ with h5py.File(self.h5_fname, 'a') as f:
+ f['data'] = 1
+
+ ddict = h5todict(self.h5_fname, path="/I/am/not/a/path", errors='ignore')
+ self.assertFalse(ddict)
+
+ with TestLogging(dictdump_logger, error=1):
+ ddict = h5todict(self.h5_fname, path="/I/am/not/a/path", errors='log')
+ self.assertFalse(ddict)
+
+ with self.assertRaises(KeyError):
+ h5todict(self.h5_fname, path="/I/am/not/a/path", errors='raise')
+
+ def testBrokenLinks(self):
+ """Test with broken links"""
+ with h5py.File(self.h5_fname, 'a') as f:
+ f["/Mars/BrokenSoftLink"] = h5py.SoftLink("/Idontexists")
+ f["/Mars/BrokenExternalLink"] = h5py.ExternalLink("notexistingfile.h5", "/Idontexists")
+
+ ddict = h5todict(self.h5_fname, path="/Mars", errors='ignore')
+ self.assertFalse(ddict)
+
+ with TestLogging(dictdump_logger, error=2):
+ ddict = h5todict(self.h5_fname, path="/Mars", errors='log')
+ self.assertFalse(ddict)
+
+ with self.assertRaises(KeyError):
+ h5todict(self.h5_fname, path="/Mars", errors='raise')
class TestDictToJson(unittest.TestCase):
@@ -436,6 +634,7 @@ def suite():
test_suite.addTest(loadTests(TestDictToNx))
test_suite.addTest(loadTests(TestDictToJson))
test_suite.addTest(loadTests(TestH5ToDict))
+ test_suite.addTest(loadTests(TestNxToDict))
return test_suite
diff --git a/silx/io/test/test_spectoh5.py b/silx/io/test/test_spectoh5.py
index c3f03e9..903a62c 100644
--- a/silx/io/test/test_spectoh5.py
+++ b/silx/io/test/test_spectoh5.py
@@ -33,6 +33,7 @@ import h5py
from ..spech5 import SpecH5, SpecH5Group
from ..convert import convert, write_to_h5
+from ..utils import h5py_read_dataset
__authors__ = ["P. Knobel"]
__license__ = "MIT"
@@ -129,7 +130,7 @@ class TestConvertSpecHDF5(unittest.TestCase):
def testTitle(self):
"""Test the value of a dataset"""
- title12 = self.h5f["/1.2/title"][()]
+ title12 = h5py_read_dataset(self.h5f["/1.2/title"])
self.assertEqual(title12,
u"aaaaaa")
diff --git a/silx/io/test/test_url.py b/silx/io/test/test_url.py
index e68c67a..114f6a7 100644
--- a/silx/io/test/test_url.py
+++ b/silx/io/test/test_url.py
@@ -152,6 +152,16 @@ class TestDataUrl(unittest.TestCase):
expected = [True, True, None, "/a.h5", "/b", (5, 1)]
self.assertUrl(url, expected)
+ def test_slice2(self):
+ url = DataUrl("/a.h5?path=/b&slice=2:5")
+ expected = [True, True, None, "/a.h5", "/b", (slice(2, 5),)]
+ self.assertUrl(url, expected)
+
+ def test_slice3(self):
+ url = DataUrl("/a.h5?path=/b&slice=::2")
+ expected = [True, True, None, "/a.h5", "/b", (slice(None, None, 2),)]
+ self.assertUrl(url, expected)
+
def test_slice_ellipsis(self):
url = DataUrl("/a.h5?path=/b&slice=...")
expected = [True, True, None, "/a.h5", "/b", (Ellipsis, )]
diff --git a/silx/io/test/test_utils.py b/silx/io/test/test_utils.py
index 6c70636..13ab532 100644
--- a/silx/io/test/test_utils.py
+++ b/silx/io/test/test_utils.py
@@ -33,6 +33,7 @@ import unittest
import sys
from .. import utils
+from ..._version import calc_hexversion
import silx.io.url
import h5py
@@ -40,11 +41,9 @@ from ..utils import h5ls
import fabio
-
__authors__ = ["P. Knobel"]
__license__ = "MIT"
-__date__ = "12/02/2018"
-
+__date__ = "03/12/2020"
expected_spec1 = r"""#F .*
#D .*
@@ -67,6 +66,28 @@ expected_spec2 = expected_spec1 + r"""
2 8\.00
3 9\.00
"""
+
+expected_spec2reg = r"""#F .*
+#D .*
+
+#S 1 Ordinate1
+#D .*
+#N 3
+#L Abscissa Ordinate1 Ordinate2
+1 4\.00 7\.00
+2 5\.00 8\.00
+3 6\.00 9\.00
+"""
+
+expected_spec2irr = expected_spec1 + r"""
+#S 2 Ordinate2
+#D .*
+#N 2
+#L Abscissa Ordinate2
+1 7\.00
+2 8\.00
+"""
+
expected_csv = r"""Abscissa;Ordinate1;Ordinate2
1;4\.00;7\.00e\+00
2;5\.00;8\.00e\+00
@@ -83,6 +104,7 @@ expected_csv2 = r"""x;y0;y1
class TestSave(unittest.TestCase):
"""Test saving curves as SpecFile:
"""
+
def setUp(self):
self.tempdir = tempfile.mkdtemp()
self.spec_fname = os.path.join(self.tempdir, "savespec.dat")
@@ -92,6 +114,7 @@ class TestSave(unittest.TestCase):
self.x = [1, 2, 3]
self.xlab = "Abscissa"
self.y = [[4, 5, 6], [7, 8, 9]]
+ self.y_irr = [[4, 5, 6], [7, 8]]
self.ylabs = ["Ordinate1", "Ordinate2"]
def tearDown(self):
@@ -103,13 +126,6 @@ class TestSave(unittest.TestCase):
os.unlink(self.npy_fname)
shutil.rmtree(self.tempdir)
- def assertRegex(self, *args, **kwargs):
- # Python 2 compatibility
- if sys.version_info.major >= 3:
- return super(TestSave, self).assertRegex(*args, **kwargs)
- else:
- return self.assertRegexpMatches(*args, **kwargs)
-
def test_save_csv(self):
utils.save1D(self.csv_fname, self.x, self.y,
xlabel=self.xlab, ylabels=self.ylabs,
@@ -145,7 +161,6 @@ class TestSave(unittest.TestCase):
specf = open(self.spec_fname)
actual_spec = specf.read()
specf.close()
-
self.assertRegex(actual_spec, expected_spec1)
def test_savespec_file_handle(self):
@@ -165,18 +180,30 @@ class TestSave(unittest.TestCase):
specf = open(self.spec_fname)
actual_spec = specf.read()
specf.close()
-
self.assertRegex(actual_spec, expected_spec2)
- def test_save_spec(self):
- """Save SpecFile using save()"""
+ def test_save_spec_reg(self):
+ """Save SpecFile using save() on a regular pattern"""
utils.save1D(self.spec_fname, self.x, self.y, xlabel=self.xlab,
ylabels=self.ylabs, filetype="spec", fmt=["%d", "%.2f"])
specf = open(self.spec_fname)
actual_spec = specf.read()
specf.close()
- self.assertRegex(actual_spec, expected_spec2)
+
+ self.assertRegex(actual_spec, expected_spec2reg)
+
+ def test_save_spec_irr(self):
+ """Save SpecFile using save() on an irregular pattern"""
+ # invalid test case ?!
+ return
+ utils.save1D(self.spec_fname, self.x, self.y_irr, xlabel=self.xlab,
+ ylabels=self.ylabs, filetype="spec", fmt=["%d", "%.2f"])
+
+ specf = open(self.spec_fname)
+ actual_spec = specf.read()
+ specf.close()
+ self.assertRegex(actual_spec, expected_spec2irr)
def test_save_csv_no_labels(self):
"""Save csv using save(), with autoheader=True but
@@ -217,6 +244,7 @@ class TestH5Ls(unittest.TestCase):
<HDF5 dataset "data": shape (1,), type "<f8">
"""
+
def assertMatchAnyStringInList(self, pattern, list_of_strings):
for string_ in list_of_strings:
if re.match(pattern, string_):
@@ -395,6 +423,7 @@ class TestOpen(unittest.TestCase):
class TestNodes(unittest.TestCase):
"""Test `silx.io.utils.is_` functions."""
+
def test_real_h5py_objects(self):
name = tempfile.mktemp(suffix=".h5")
try:
@@ -417,45 +446,60 @@ class TestNodes(unittest.TestCase):
os.unlink(name)
def test_h5py_like_file(self):
+
class Foo(object):
+
def __init__(self):
self.h5_class = utils.H5Type.FILE
+
obj = Foo()
self.assertTrue(utils.is_file(obj))
self.assertTrue(utils.is_group(obj))
self.assertFalse(utils.is_dataset(obj))
def test_h5py_like_group(self):
+
class Foo(object):
+
def __init__(self):
self.h5_class = utils.H5Type.GROUP
+
obj = Foo()
self.assertFalse(utils.is_file(obj))
self.assertTrue(utils.is_group(obj))
self.assertFalse(utils.is_dataset(obj))
def test_h5py_like_dataset(self):
+
class Foo(object):
+
def __init__(self):
self.h5_class = utils.H5Type.DATASET
+
obj = Foo()
self.assertFalse(utils.is_file(obj))
self.assertFalse(utils.is_group(obj))
self.assertTrue(utils.is_dataset(obj))
def test_bad(self):
+
class Foo(object):
+
def __init__(self):
pass
+
obj = Foo()
self.assertFalse(utils.is_file(obj))
self.assertFalse(utils.is_group(obj))
self.assertFalse(utils.is_dataset(obj))
def test_bad_api(self):
+
class Foo(object):
+
def __init__(self):
self.h5_class = int
+
obj = Foo()
self.assertFalse(utils.is_file(obj))
self.assertFalse(utils.is_group(obj))
@@ -513,18 +557,20 @@ class TestGetData(unittest.TestCase):
def test_hdf5_array(self):
url = "silx:%s?/group/group/array" % self.h5_filename
data = utils.get_data(url=url)
- self.assertEqual(data.shape, (5, ))
+ self.assertEqual(data.shape, (5,))
self.assertEqual(data[0], 1)
def test_hdf5_array_slice(self):
url = "silx:%s?path=/group/group/array2d&slice=1" % self.h5_filename
data = utils.get_data(url=url)
- self.assertEqual(data.shape, (5, ))
+ self.assertEqual(data.shape, (5,))
self.assertEqual(data[0], 6)
def test_hdf5_array_slice_out_of_range(self):
url = "silx:%s?path=/group/group/array2d&slice=5" % self.h5_filename
- self.assertRaises(ValueError, utils.get_data, url)
+ # ValueError: h5py 2.x
+ # IndexError: h5py 3.x
+ self.assertRaises((ValueError, IndexError), utils.get_data, url)
def test_edf_using_silx(self):
url = "silx:%s?/scan_0/instrument/detector_0/data" % self.edf_filename
@@ -568,14 +614,15 @@ class TestGetData(unittest.TestCase):
def _h5_py_version_older_than(version):
- v_majeur, v_mineur, v_micro = h5py.version.version.split('.')[:3]
- r_majeur, r_mineur, r_micro = version.split('.')
- return v_majeur >= r_majeur and v_mineur >= r_mineur
+ v_majeur, v_mineur, v_micro = [int(i) for i in h5py.version.version.split('.')[:3]]
+ r_majeur, r_mineur, r_micro = [int(i) for i in version.split('.')]
+ return calc_hexversion(v_majeur, v_mineur, v_micro) >= calc_hexversion(r_majeur, r_mineur, r_micro)
@unittest.skipUnless(_h5_py_version_older_than('2.9.0'), 'h5py version < 2.9.0')
class TestRawFileToH5(unittest.TestCase):
"""Test conversion of .vol file to .h5 external dataset"""
+
def setUp(self):
self.tempdir = tempfile.mkdtemp()
self._vol_file = os.path.join(self.tempdir, 'test_vol.vol')
@@ -589,7 +636,7 @@ class TestRawFileToH5(unittest.TestCase):
assert os.path.exists(self._vol_file + '.npy')
os.rename(self._vol_file + '.npy', self._vol_file)
self.h5_file = os.path.join(self.tempdir, 'test_h5.h5')
- self.external_dataset_path= '/root/my_external_dataset'
+ self.external_dataset_path = '/root/my_external_dataset'
self._data_url = silx.io.url.DataUrl(file_path=self.h5_file,
data_path=self.external_dataset_path)
with open(self._file_info, 'w') as _fi:
@@ -672,6 +719,158 @@ class TestRawFileToH5(unittest.TestCase):
shape=self._dataset_shape))
+class TestH5Strings(unittest.TestCase):
+ """Test HDF5 str and bytes writing and reading"""
+
+ @classmethod
+ def setUpClass(cls):
+ cls.tempdir = tempfile.mkdtemp()
+ cls.vlenstr = h5py.special_dtype(vlen=str)
+ cls.vlenbytes = h5py.special_dtype(vlen=bytes)
+ try:
+ cls.unicode = unicode
+ except NameError:
+ cls.unicode = str
+
+ @classmethod
+ def tearDownClass(cls):
+ shutil.rmtree(cls.tempdir)
+
+ def setUp(self):
+ self.file = h5py.File(os.path.join(self.tempdir, 'file.h5'), mode="w")
+
+ def tearDown(self):
+ self.file.close()
+
+ @classmethod
+ def _make_array(cls, value, n):
+ if isinstance(value, bytes):
+ dtype = cls.vlenbytes
+ elif isinstance(value, cls.unicode):
+ dtype = cls.vlenstr
+ else:
+ return numpy.array([value] * n)
+ return numpy.array([value] * n, dtype=dtype)
+
+ @classmethod
+ def _get_charset(cls, value):
+ if isinstance(value, bytes):
+ return h5py.h5t.CSET_ASCII
+ elif isinstance(value, cls.unicode):
+ return h5py.h5t.CSET_UTF8
+ else:
+ return None
+
+ def _check_dataset(self, value, result=None):
+ # Write+read scalar
+ if result:
+ decode_ascii = True
+ else:
+ decode_ascii = False
+ result = value
+ charset = self._get_charset(value)
+ self.file["data"] = value
+ data = utils.h5py_read_dataset(self.file["data"], decode_ascii=decode_ascii)
+ assert type(data) == type(result), data
+ assert data == result, data
+ if charset:
+ assert self.file["data"].id.get_type().get_cset() == charset
+
+ # Write+read variable length
+ self.file["vlen_data"] = self._make_array(value, 2)
+ data = utils.h5py_read_dataset(self.file["vlen_data"], decode_ascii=decode_ascii, index=0)
+ assert type(data) == type(result), data
+ assert data == result, data
+ data = utils.h5py_read_dataset(self.file["vlen_data"], decode_ascii=decode_ascii)
+ numpy.testing.assert_array_equal(data, [result] * 2)
+ if charset:
+ assert self.file["vlen_data"].id.get_type().get_cset() == charset
+
+ def _check_attribute(self, value, result=None):
+ if result:
+ decode_ascii = True
+ else:
+ decode_ascii = False
+ result = value
+ self.file.attrs["data"] = value
+ data = utils.h5py_read_attribute(self.file.attrs, "data", decode_ascii=decode_ascii)
+ assert type(data) == type(result), data
+ assert data == result, data
+
+ self.file.attrs["vlen_data"] = self._make_array(value, 2)
+ data = utils.h5py_read_attribute(self.file.attrs, "vlen_data", decode_ascii=decode_ascii)
+ assert type(data[0]) == type(result), data[0]
+ assert data[0] == result, data[0]
+ numpy.testing.assert_array_equal(data, [result] * 2)
+
+ data = utils.h5py_read_attributes(self.file.attrs, decode_ascii=decode_ascii)["vlen_data"]
+ assert type(data[0]) == type(result), data[0]
+ assert data[0] == result, data[0]
+ numpy.testing.assert_array_equal(data, [result] * 2)
+
+ def test_dataset_ascii_bytes(self):
+ self._check_dataset(b"abc")
+
+ def test_attribute_ascii_bytes(self):
+ self._check_attribute(b"abc")
+
+ def test_dataset_ascii_bytes_decode(self):
+ self._check_dataset(b"abc", result="abc")
+
+ def test_attribute_ascii_bytes_decode(self):
+ self._check_attribute(b"abc", result="abc")
+
+ def test_dataset_ascii_str(self):
+ self._check_dataset("abc")
+
+ def test_attribute_ascii_str(self):
+ self._check_attribute("abc")
+
+ def test_dataset_utf8_str(self):
+ self._check_dataset("\u0101bc")
+
+ def test_attribute_utf8_str(self):
+ self._check_attribute("\u0101bc")
+
+ def test_dataset_utf8_bytes(self):
+ # 0xC481 is the byte representation of U+0101
+ self._check_dataset(b"\xc4\x81bc")
+
+ def test_attribute_utf8_bytes(self):
+ # 0xC481 is the byte representation of U+0101
+ self._check_attribute(b"\xc4\x81bc")
+
+ def test_dataset_utf8_bytes_decode(self):
+ # 0xC481 is the byte representation of U+0101
+ self._check_dataset(b"\xc4\x81bc", result="\u0101bc")
+
+ def test_attribute_utf8_bytes_decode(self):
+ # 0xC481 is the byte representation of U+0101
+ self._check_attribute(b"\xc4\x81bc", result="\u0101bc")
+
+ def test_dataset_latin1_bytes(self):
+ # extended ascii character 0xE4
+ self._check_dataset(b"\xe423")
+
+ def test_attribute_latin1_bytes(self):
+ # extended ascii character 0xE4
+ self._check_attribute(b"\xe423")
+
+ def test_dataset_latin1_bytes_decode(self):
+ # U+DCE4: surrogate for extended ascii character 0xE4
+ self._check_dataset(b"\xe423", result="\udce423")
+
+ def test_attribute_latin1_bytes_decode(self):
+ # U+DCE4: surrogate for extended ascii character 0xE4
+ self._check_attribute(b"\xe423", result="\udce423")
+
+ def test_dataset_no_string(self):
+ self._check_dataset(numpy.int64(10))
+
+ def test_attribute_no_string(self):
+ self._check_attribute(numpy.int64(10))
+
+
def suite():
loadTests = unittest.defaultTestLoader.loadTestsFromTestCase
test_suite = unittest.TestSuite()
@@ -681,6 +880,7 @@ def suite():
test_suite.addTest(loadTests(TestNodes))
test_suite.addTest(loadTests(TestGetData))
test_suite.addTest(loadTests(TestRawFileToH5))
+ test_suite.addTest(loadTests(TestH5Strings))
return test_suite
diff --git a/silx/io/url.py b/silx/io/url.py
index 7607ae5..044977c 100644
--- a/silx/io/url.py
+++ b/silx/io/url.py
@@ -178,8 +178,20 @@ class DataUrl(object):
def str_to_slice(string):
if string == "...":
return Ellipsis
- elif string == ":":
- return slice(None)
+ elif ':' in string:
+ if string == ":":
+ return slice(None)
+ else:
+ def get_value(my_str):
+ if my_str in ('', None):
+ return None
+ else:
+ return int(my_str)
+ sss = string.split(':')
+ start = get_value(sss[0])
+ stop = get_value(sss[1] if len(sss) > 1 else None)
+ step = get_value(sss[2] if len(sss) > 2 else None)
+ return slice(start, stop, step)
else:
return int(string)
@@ -201,7 +213,10 @@ class DataUrl(object):
:param str path: Path representing the URL.
"""
self.__path = path
- path = path.replace("::", "?", 1)
+ # only replace if ? not here already. Otherwise can mess sith
+ # data_slice if == ::2 for example
+ if '?' not in path:
+ path = path.replace("::", "?", 1)
url = parse.urlparse(path)
is_valid = True
diff --git a/silx/io/utils.py b/silx/io/utils.py
index 5da344d..12e9a7e 100644
--- a/silx/io/utils.py
+++ b/silx/io/utils.py
@@ -25,8 +25,7 @@
__authors__ = ["P. Knobel", "V. Valls"]
__license__ = "MIT"
-__date__ = "18/04/2018"
-
+__date__ = "03/12/2020"
import enum
import os.path
@@ -40,18 +39,19 @@ import six
from silx.utils.proxy import Proxy
import silx.io.url
+from .._version import calc_hexversion
import h5py
+import h5py.h5t
+import h5py.h5a
try:
import h5pyd
except ImportError as e:
h5pyd = None
-
logger = logging.getLogger(__name__)
-
NEXUS_HDF5_EXT = [".h5", ".nx5", ".nxs", ".hdf", ".hdf5", ".cxi"]
"""List of possible extensions for HDF5 file formats."""
@@ -190,34 +190,46 @@ def save1D(fname, x, y, xlabel=None, ylabels=None, filetype=None,
if xlabel is None:
xlabel = "x"
if ylabels is None:
- if len(numpy.array(y).shape) > 1:
+ if numpy.array(y).ndim > 1:
ylabels = ["y%d" % i for i in range(len(y))]
else:
ylabels = ["y"]
elif isinstance(ylabels, (list, tuple)):
# if ylabels is provided as a list, every element must
# be a string
- ylabels = [ylabels[i] if ylabels[i] is not None else "y%d" % i
- for i in range(len(ylabels))]
+ ylabels = [ylabel if isinstance(ylabel, string_types) else "y%d" % i
+ for ylabel in ylabels]
if filetype.lower() == "spec":
- y_array = numpy.asarray(y)
-
- # make sure y_array is a 2D array even for a single curve
- if len(y_array.shape) == 1:
- y_array = y_array.reshape(1, y_array.shape[0])
- elif len(y_array.shape) > 2 or len(y_array.shape) < 1:
- raise IndexError("y must be a 1D or 2D array")
-
- # First curve
- specf = savespec(fname, x, y_array[0], xlabel, ylabels[0], fmt=fmt,
- scan_number=1, mode="w", write_file_header=True,
- close_file=False)
- # Other curves
- for i in range(1, y_array.shape[0]):
- specf = savespec(specf, x, y_array[i], xlabel, ylabels[i],
- fmt=fmt, scan_number=i + 1, mode="w",
- write_file_header=False, close_file=False)
+ # Check if we have regular data:
+ ref = len(x)
+ regular = True
+ for one_y in y:
+ regular &= len(one_y) == ref
+ if regular:
+ if isinstance(fmt, (list, tuple)) and len(fmt) < (len(ylabels) + 1):
+ fmt = fmt + [fmt[-1] * (1 + len(ylabels) - len(fmt))]
+ specf = savespec(fname, x, y, xlabel, ylabels, fmt=fmt,
+ scan_number=1, mode="w", write_file_header=True,
+ close_file=False)
+ else:
+ y_array = numpy.asarray(y)
+ # make sure y_array is a 2D array even for a single curve
+ if y_array.ndim == 1:
+ y_array.shape = 1, -1
+ elif y_array.ndim not in [1, 2]:
+ raise IndexError("y must be a 1D or 2D array")
+
+ # First curve
+ specf = savespec(fname, x, y_array[0], xlabel, ylabels[0], fmt=fmt,
+ scan_number=1, mode="w", write_file_header=True,
+ close_file=False)
+ # Other curves
+ for i in range(1, y_array.shape[0]):
+ specf = savespec(specf, x, y_array[i], xlabel, ylabels[i],
+ fmt=fmt, scan_number=i + 1, mode="w",
+ write_file_header=False, close_file=False)
+
# close file if we created it
if not hasattr(fname, "write"):
specf.close()
@@ -307,9 +319,11 @@ def savespec(specfile, x, y, xlabel="X", ylabel="Y", fmt="%.7g",
or append mode. If a file name is provided, a new file is open in
write mode (existing file with the same name will be lost)
:param x: 1D-Array (or list) of abscissa values
- :param y: 1D-array (or list) of ordinates values
+ :param y: 1D-array (or list), or list of them of ordinates values.
+ All dataset must have the same length as x
:param xlabel: Abscissa label (default ``"X"``)
- :param ylabel: Ordinate label
+ :param ylabel: Ordinate label, may be a list of labels when multiple curves
+ are to be saved together.
:param fmt: Format string for data. You can specify a short format
string that defines a single format for both ``x`` and ``y`` values,
or a list of two different format strings (e.g. ``["%d", "%.7g"]``).
@@ -333,40 +347,51 @@ def savespec(specfile, x, y, xlabel="X", ylabel="Y", fmt="%.7g",
x_array = numpy.asarray(x)
y_array = numpy.asarray(y)
+ if y_array.ndim > 2:
+ raise IndexError("Y columns must have be packed as 1D")
- if y_array.shape[0] != x_array.shape[0]:
+ if y_array.shape[-1] != x_array.shape[0]:
raise IndexError("X and Y columns must have the same length")
+ if y_array.ndim == 2:
+ assert isinstance(ylabel, (list, tuple))
+ assert y_array.shape[0] == len(ylabel)
+ labels = (xlabel, *ylabel)
+ else:
+ labels = (xlabel, ylabel)
+ data = numpy.vstack((x_array, y_array))
+ ncol = data.shape[0]
+ assert len(labels) == ncol
+
+ print(xlabel, ylabel, fmt, ncol, x_array, y_array)
if isinstance(fmt, string_types) and fmt.count("%") == 1:
- full_fmt_string = fmt + " " + fmt + "\n"
- elif isinstance(fmt, (list, tuple)) and len(fmt) == 2:
- full_fmt_string = " ".join(fmt) + "\n"
+ full_fmt_string = " ".join([fmt] * ncol)
+ elif isinstance(fmt, (list, tuple)) and len(fmt) == ncol:
+ full_fmt_string = " ".join(fmt)
else:
- raise ValueError("fmt must be a single format string or a list of " +
- "two format strings")
+ raise ValueError("`fmt` must be a single format string or a list of " +
+ "format strings with as many format as ncolumns")
if not hasattr(specfile, "write"):
f = builtin_open(specfile, mode)
else:
f = specfile
- output = ""
-
- current_date = "#D %s\n" % (time.ctime(time.time()))
-
+ current_date = "#D %s" % (time.ctime(time.time()))
if write_file_header:
- output += "#F %s\n" % f.name
- output += current_date
- output += "\n"
-
- output += "#S %d %s\n" % (scan_number, ylabel)
- output += current_date
- output += "#N 2\n"
- output += "#L %s %s\n" % (xlabel, ylabel)
- for i in range(y_array.shape[0]):
- output += full_fmt_string % (x_array[i], y_array[i])
- output += "\n"
+ lines = [ "#F %s" % f.name, current_date, ""]
+ else:
+ lines = [""]
+ lines += [ "#S %d %s" % (scan_number, labels[1]),
+ current_date,
+ "#N %d" % ncol,
+ "#L " + " ".join(labels)]
+
+ for i in data.T:
+ lines.append(full_fmt_string % tuple(i))
+ lines.append("")
+ output = "\n".join(lines)
f.write(output.encode())
if close_file:
@@ -406,7 +431,7 @@ def h5ls(h5group, lvl=0):
if is_group(h5group):
h5f = h5group
elif isinstance(h5group, string_types):
- h5f = open(h5group) # silx.io.open
+ h5f = open(h5group) # silx.io.open
else:
raise TypeError("h5group must be a hdf5-like group object or a file name.")
@@ -735,6 +760,26 @@ def is_softlink(obj):
return t == H5Type.SOFT_LINK
+def is_externallink(obj):
+ """
+ True if the object is a h5py.ExternalLink-like object.
+
+ :param obj: An object
+ """
+ t = get_h5_class(obj)
+ return t == H5Type.EXTERNAL_LINK
+
+
+def is_link(obj):
+ """
+ True if the object is a h5py link-like object.
+
+ :param obj: An object
+ """
+ t = get_h5_class(obj)
+ return t in {H5Type.SOFT_LINK, H5Type.EXTERNAL_LINK}
+
+
def get_data(url):
"""Returns a numpy data from an URL.
@@ -791,16 +836,16 @@ def get_data(url):
raise ValueError("Data path from URL '%s' is not a dataset" % url.path())
if data_slice is not None:
- data = data[data_slice]
+ data = h5py_read_dataset(data, index=data_slice)
else:
# works for scalar and array
- data = data[()]
+ data = h5py_read_dataset(data)
elif url.scheme() == "fabio":
import fabio
data_slice = url.data_slice()
if data_slice is None:
- data_slice = (0, )
+ data_slice = (0,)
if data_slice is None or len(data_slice) != 1:
raise ValueError("Fabio slice expect a single frame, but %s found" % data_slice)
index = data_slice[0]
@@ -844,8 +889,8 @@ def rawfile_to_h5_external_dataset(bin_file, output_url, shape, dtype,
"""
assert isinstance(output_url, silx.io.url.DataUrl)
assert isinstance(shape, (tuple, list))
- v_majeur, v_mineur, v_micro = h5py.version.version.split('.')
- if v_majeur <= '2' and v_mineur < '9':
+ v_majeur, v_mineur, v_micro = [int(i) for i in h5py.version.version.split('.')[:3]]
+ if calc_hexversion(v_majeur, v_mineur, v_micro)< calc_hexversion(2,9,0):
raise Exception('h5py >= 2.9 should be installed to access the '
'external feature.')
@@ -915,3 +960,183 @@ def vol_to_h5_external_dataset(vol_file, output_url, info_file=None,
shape=shape,
dtype=vol_dtype,
overwrite=overwrite)
+
+
+def h5py_decode_value(value, encoding="utf-8", errors="surrogateescape"):
+ """Keep bytes when value cannot be decoded
+
+ :param value: bytes or array of bytes
+ :param encoding str:
+ :param errors str:
+ """
+ try:
+ if numpy.isscalar(value):
+ return value.decode(encoding, errors=errors)
+ str_item = [b.decode(encoding, errors=errors) for b in value.flat]
+ return numpy.array(str_item, dtype=object).reshape(value.shape)
+ except UnicodeDecodeError:
+ return value
+
+
+def h5py_encode_value(value, encoding="utf-8", errors="surrogateescape"):
+ """Keep string when value cannot be encoding
+
+ :param value: string or array of strings
+ :param encoding str:
+ :param errors str:
+ """
+ try:
+ if numpy.isscalar(value):
+ return value.encode(encoding, errors=errors)
+ bytes_item = [s.encode(encoding, errors=errors) for s in value.flat]
+ return numpy.array(bytes_item, dtype=object).reshape(value.shape)
+ except UnicodeEncodeError:
+ return value
+
+
+class H5pyDatasetReadWrapper:
+ """Wrapper to handle H5T_STRING decoding on-the-fly when reading
+ a dataset. Uniform behaviour for h5py 2.x and h5py 3.x
+
+ h5py abuses H5T_STRING with ASCII character set
+ to store `bytes`: dset[()] = b"..."
+ Therefore an H5T_STRING with ASCII encoding is not decoded by default.
+ """
+
+ H5PY_AUTODECODE_NONASCII = int(h5py.version.version.split(".")[0]) < 3
+
+ def __init__(self, dset, decode_ascii=False):
+ """
+ :param h5py.Dataset dset:
+ :param bool decode_ascii:
+ """
+ try:
+ string_info = h5py.h5t.check_string_dtype(dset.dtype)
+ except AttributeError:
+ # h5py < 2.10
+ try:
+ idx = dset.id.get_type().get_cset()
+ except AttributeError:
+ # Not an H5T_STRING
+ encoding = None
+ else:
+ encoding = ["ascii", "utf-8"][idx]
+ else:
+ # h5py >= 2.10
+ try:
+ encoding = string_info.encoding
+ except AttributeError:
+ # Not an H5T_STRING
+ encoding = None
+ if encoding == "ascii" and not decode_ascii:
+ encoding = None
+ if encoding != "ascii" and self.H5PY_AUTODECODE_NONASCII:
+ # Decoding is already done by the h5py library
+ encoding = None
+ if encoding == "ascii":
+ # ASCII can be decoded as UTF-8
+ encoding = "utf-8"
+ self._encoding = encoding
+ self._dset = dset
+
+ def __getitem__(self, args):
+ value = self._dset[args]
+ if self._encoding:
+ return h5py_decode_value(value, encoding=self._encoding)
+ else:
+ return value
+
+
+class H5pyAttributesReadWrapper:
+ """Wrapper to handle H5T_STRING decoding on-the-fly when reading
+ an attribute. Uniform behaviour for h5py 2.x and h5py 3.x
+
+ h5py abuses H5T_STRING with ASCII character set
+ to store `bytes`: dset[()] = b"..."
+ Therefore an H5T_STRING with ASCII encoding is not decoded by default.
+ """
+
+ H5PY_AUTODECODE = int(h5py.version.version.split(".")[0]) >= 3
+
+ def __init__(self, attrs, decode_ascii=False):
+ """
+ :param h5py.Dataset dset:
+ :param bool decode_ascii:
+ """
+ self._attrs = attrs
+ self._decode_ascii = decode_ascii
+
+ def __getitem__(self, args):
+ value = self._attrs[args]
+
+ # Get the string encoding (if a string)
+ try:
+ dtype = self._attrs.get_id(args).dtype
+ except AttributeError:
+ # h5py < 2.10
+ attr_id = h5py.h5a.open(self._attrs._id, self._attrs._e(args))
+ try:
+ idx = attr_id.get_type().get_cset()
+ except AttributeError:
+ # Not an H5T_STRING
+ return value
+ else:
+ encoding = ["ascii", "utf-8"][idx]
+ else:
+ # h5py >= 2.10
+ try:
+ encoding = h5py.h5t.check_string_dtype(dtype).encoding
+ except AttributeError:
+ # Not an H5T_STRING
+ return value
+
+ if self.H5PY_AUTODECODE:
+ if encoding == "ascii" and not self._decode_ascii:
+ # Undo decoding by the h5py library
+ return h5py_encode_value(value, encoding="utf-8")
+ else:
+ if encoding == "ascii" and self._decode_ascii:
+ # Decode ASCII as UTF-8 for consistency
+ return h5py_decode_value(value, encoding="utf-8")
+
+ # Decoding is already done by the h5py library
+ return value
+
+ def items(self):
+ for k in self._attrs.keys():
+ yield k, self[k]
+
+
+def h5py_read_dataset(dset, index=tuple(), decode_ascii=False):
+ """Read data from dataset object. UTF-8 strings will be
+ decoded while ASCII strings will only be decoded when
+ `decode_ascii=True`.
+
+ :param h5py.Dataset dset:
+ :param index: slicing (all by default)
+ :param bool decode_ascii:
+ """
+ return H5pyDatasetReadWrapper(dset, decode_ascii=decode_ascii)[index]
+
+
+def h5py_read_attribute(attrs, name, decode_ascii=False):
+ """Read data from attributes. UTF-8 strings will be
+ decoded while ASCII strings will only be decoded when
+ `decode_ascii=True`.
+
+ :param h5py.AttributeManager attrs:
+ :param str name: attribute name
+ :param bool decode_ascii:
+ """
+ return H5pyAttributesReadWrapper(attrs, decode_ascii=decode_ascii)[name]
+
+
+def h5py_read_attributes(attrs, decode_ascii=False):
+ """Read data from attributes. UTF-8 strings will be
+ decoded while ASCII strings will only be decoded when
+ `decode_ascii=True`.
+
+ :param h5py.AttributeManager attrs:
+ :param bool decode_ascii:
+ """
+ return dict(H5pyAttributesReadWrapper(attrs, decode_ascii=decode_ascii).items())