summaryrefslogtreecommitdiff
path: root/silx/io
diff options
context:
space:
mode:
Diffstat (limited to 'silx/io')
-rw-r--r--silx/io/dictdump.py303
-rw-r--r--silx/io/h5py_utils.py317
-rw-r--r--silx/io/spech5.py30
-rw-r--r--silx/io/test/__init__.py2
-rw-r--r--silx/io/test/test_dictdump.py411
-rw-r--r--silx/io/test/test_h5py_utils.py397
-rw-r--r--silx/io/url.py2
7 files changed, 1367 insertions, 95 deletions
diff --git a/silx/io/dictdump.py b/silx/io/dictdump.py
index bbb244a..e907668 100644
--- a/silx/io/dictdump.py
+++ b/silx/io/dictdump.py
@@ -26,6 +26,7 @@ by text strings to following file formats: `HDF5, INI, JSON`
"""
from collections import OrderedDict
+from collections.abc import Mapping
import json
import logging
import numpy
@@ -34,11 +35,16 @@ import sys
import h5py
from .configdict import ConfigDict
-from .utils import is_group, is_link, is_softlink, is_externallink
+from .utils import is_group
+from .utils import is_dataset
+from .utils import is_link
+from .utils import is_softlink
+from .utils import is_externallink
from .utils import is_file as is_h5_file_like
from .utils import open as h5open
from .utils import h5py_read_dataset
from .utils import H5pyAttributesReadWrapper
+from silx.utils.deprecation import deprecated_warning
__authors__ = ["P. Knobel"]
__license__ = "MIT"
@@ -66,7 +72,7 @@ def _prepare_hdf5_write_value(array_like):
return array
-class _SafeH5FileWrite(object):
+class _SafeH5FileWrite:
"""Context manager returning a :class:`h5py.File` object.
If this object is initialized with a file path, we open the file
@@ -82,7 +88,6 @@ class _SafeH5FileWrite(object):
"""
def __init__(self, h5file, mode="w"):
"""
-
:param h5file: HDF5 file path or :class:`h5py.File` instance
:param str mode: Can be ``"r+"`` (read/write, file must exist),
``"w"`` (write, existing file is lost), ``"w-"`` (write, fail if
@@ -106,7 +111,7 @@ class _SafeH5FileWrite(object):
self.h5file.close()
-class _SafeH5FileRead(object):
+class _SafeH5FileRead:
"""Context manager returning a :class:`h5py.File` or a
:class:`silx.io.spech5.SpecH5` or a :class:`silx.io.fabioh5.File` object.
@@ -136,18 +141,48 @@ class _SafeH5FileRead(object):
self.h5file.close()
+def _normalize_h5_path(h5root, h5path):
+ """
+ :param h5root: File name or h5py-like File, Group or Dataset
+ :param str h5path: relative to ``h5root``
+ :returns 2-tuple: (File or file object, h5path)
+ """
+ if is_group(h5root):
+ group_name = h5root.name
+ if group_name == "/":
+ pass
+ elif h5path:
+ h5path = group_name + "/" + h5path
+ else:
+ h5path = group_name
+ h5file = h5root.file
+ elif is_dataset(h5root):
+ h5path = h5root.name
+ h5file = h5root.file
+ else:
+ h5file = h5root
+ if not h5path:
+ h5path = "/"
+ elif not h5path.endswith("/"):
+ h5path += "/"
+ return h5file, h5path
+
+
def dicttoh5(treedict, h5file, h5path='/',
- mode="w", overwrite_data=False,
- create_dataset_args=None):
+ mode="w", overwrite_data=None,
+ create_dataset_args=None, update_mode=None):
"""Write a nested dictionary to a HDF5 file, using keys as member names.
If a dictionary value is a sub-dictionary, a group is created. If it is
any other data type, it is cast into a numpy array and written as a
:mod:`h5py` dataset. Dictionary keys must be strings and cannot contain
the ``/`` character.
-
+
If dictionary keys are tuples they are interpreted to set h5 attributes.
- The tuples should have the format (dataset_name,attr_name)
+ The tuples should have the format (dataset_name, attr_name).
+
+ Existing HDF5 items can be deleted by providing the dictionary value
+ ``None``, provided that ``update_mode in ["modify", "replace"]``.
.. note::
@@ -158,21 +193,29 @@ def dicttoh5(treedict, h5file, h5path='/',
to define sub trees. If tuples are used as keys they should have the
format (dataset_name,attr_name) and will add a 5h attribute with the
corresponding value.
- :param h5file: HDF5 file name or handle. If a file name is provided, the
- function opens the file in the specified mode and closes it again
- before completing.
- :param h5path: Target path in HDF5 file in which scan groups are created.
+ :param h5file: File name or h5py-like File, Group or Dataset
+ :param h5path: Target path in the HDF5 file relative to ``h5file``.
Default is root (``"/"``)
:param mode: Can be ``"r+"`` (read/write, file must exist),
``"w"`` (write, existing file is lost), ``"w-"`` (write, fail if
exists) or ``"a"`` (read/write if exists, create otherwise).
This parameter is ignored if ``h5file`` is a file handle.
- :param overwrite_data: If ``True``, existing groups and datasets can be
- overwritten, if ``False`` they are skipped. This parameter is only
- relevant if ``h5file_mode`` is ``"r+"`` or ``"a"``.
+ :param overwrite_data: Deprecated. ``True`` is approximately equivalent
+ to ``update_mode="modify"`` and ``False`` is equivalent to
+ ``update_mode="add"``.
:param create_dataset_args: Dictionary of args you want to pass to
``h5f.create_dataset``. This allows you to specify filters and
compression parameters. Don't specify ``name`` and ``data``.
+ :param update_mode: Can be ``add`` (default), ``modify`` or ``replace``.
+
+ * ``add``: Extend the existing HDF5 tree when possible. Existing HDF5
+ items (groups, datasets and attributes) remain untouched.
+ * ``modify``: Extend the existing HDF5 tree when possible, modify
+ existing attributes, modify same-sized dataset values and delete
+ HDF5 items with a ``None`` value in the dict tree.
+ * ``replace``: Replace the existing HDF5 tree. Items from the root of
+ the HDF5 tree that are not present in the root of the dict tree
+ will remain untouched.
Example::
@@ -201,44 +244,110 @@ def dicttoh5(treedict, h5file, h5path='/',
create_dataset_args=create_ds_args)
"""
- if not h5path.endswith("/"):
- h5path += "/"
+ if overwrite_data is not None:
+ reason = (
+ "`overwrite_data=True` becomes `update_mode='modify'` and "
+ "`overwrite_data=False` becomes `update_mode='add'`"
+ )
+ deprecated_warning(
+ type_="argument",
+ name="overwrite_data",
+ reason=reason,
+ replacement="update_mode",
+ since_version="0.15",
+ )
+
+ if update_mode is None:
+ if overwrite_data:
+ update_mode = "modify"
+ else:
+ update_mode = "add"
+ else:
+ valid_existing_values = ("add", "replace", "modify")
+ if update_mode not in valid_existing_values:
+ raise ValueError((
+ "Argument 'update_mode' can only have values: {}"
+ "".format(valid_existing_values)
+ ))
+ if overwrite_data is not None:
+ logger.warning("The argument `overwrite_data` is ignored")
- with _SafeH5FileWrite(h5file, mode=mode) as h5f:
- if isinstance(treedict, dict) and h5path != "/":
- if h5path not in h5f:
- h5f.create_group(h5path)
+ if not isinstance(treedict, Mapping):
+ raise TypeError("'treedict' must be a dictionary")
- for key in filter(lambda k: not isinstance(k, tuple), treedict):
- key_is_group = isinstance(treedict[key], dict)
- h5name = h5path + key
+ h5file, h5path = _normalize_h5_path(h5file, h5path)
- if key_is_group and treedict[key]:
- # non-empty group: recurse
- dicttoh5(treedict[key], h5f, h5name,
- overwrite_data=overwrite_data,
- create_dataset_args=create_dataset_args)
- continue
+ def _iter_treedict(attributes=False):
+ nonlocal treedict
+ for key, value in treedict.items():
+ if isinstance(key, tuple) == attributes:
+ yield key, value
- if h5name in h5f:
- # key already exists: delete or skip
- if overwrite_data is True:
- del h5f[h5name]
+ change_allowed = update_mode in ("replace", "modify")
+
+ with _SafeH5FileWrite(h5file, mode=mode) as h5f:
+ # Create the root of the tree
+ if h5path in h5f:
+ if not is_group(h5f[h5path]):
+ if update_mode == "replace":
+ del h5f[h5path]
+ h5f.create_group(h5path)
else:
- logger.warning('key (%s) already exists. '
- 'Not overwriting.' % (h5name))
- continue
+ return
+ else:
+ h5f.create_group(h5path)
- value = treedict[key]
+ # Loop over all groups, links and datasets
+ for key, value in _iter_treedict(attributes=False):
+ h5name = h5path + key
+ exists = h5name in h5f
- if value is None or key_is_group:
- # Create empty group
- h5f.create_group(h5name)
+ if value is None:
+ # Delete HDF5 item
+ if exists and change_allowed:
+ del h5f[h5name]
+ exists = False
+ elif isinstance(value, Mapping):
+ # HDF5 group
+ if exists and update_mode == "replace":
+ del h5f[h5name]
+ exists = False
+ if value:
+ dicttoh5(value, h5f, h5name,
+ update_mode=update_mode,
+ create_dataset_args=create_dataset_args)
+ elif not exists:
+ h5f.create_group(h5name)
elif is_link(value):
- h5f[h5name] = value
+ # HDF5 link
+ if exists and update_mode == "replace":
+ del h5f[h5name]
+ exists = False
+ if not exists:
+ # Create link from h5py link object
+ h5f[h5name] = value
else:
+ # HDF5 dataset
+ if exists and not change_allowed:
+ continue
data = _prepare_hdf5_write_value(value)
- # can't apply filters on scalars (datasets with shape == () )
+
+ # Edit the existing dataset
+ attrs_backup = None
+ if exists:
+ try:
+ h5f[h5name][()] = data
+ continue
+ except Exception:
+ # Delete the existing dataset
+ if update_mode != "replace":
+ if not is_dataset(h5f[h5name]):
+ continue
+ attrs_backup = dict(h5f[h5name].attrs)
+ del h5f[h5name]
+
+ # Create dataset
+ # can't apply filters on scalars (datasets with shape == ())
if data.shape == () or create_dataset_args is None:
h5f.create_dataset(h5name,
data=data)
@@ -246,36 +355,58 @@ def dicttoh5(treedict, h5file, h5path='/',
h5f.create_dataset(h5name,
data=data,
**create_dataset_args)
+ if attrs_backup:
+ h5f[h5name].attrs.update(attrs_backup)
- # deal with h5 attributes which have tuples as keys in treedict
- for key in filter(lambda k: isinstance(k, tuple), treedict):
- assert len(key) == 2, "attribute must be defined by 2 values"
+ # Loop over all attributes
+ for key, value in _iter_treedict(attributes=True):
+ if len(key) != 2:
+ raise ValueError("HDF5 attribute must be described by 2 values")
h5name = h5path + key[0]
attr_name = key[1]
if h5name not in h5f:
- # Create empty group if key for attr does not exist
+ # Create an empty group to store the attribute
h5f.create_group(h5name)
- logger.warning(
- "key (%s) does not exist. attr %s "
- "will be written to ." % (h5name, attr_name)
- )
-
- if attr_name in h5f[h5name].attrs:
- if not overwrite_data:
- logger.warning(
- "attribute %s@%s already exists. Not overwriting."
- "" % (h5name, attr_name)
- )
+
+ h5a = h5f[h5name].attrs
+ exists = attr_name in h5a
+
+ if value is None:
+ # Delete HDF5 attribute
+ if exists and change_allowed:
+ del h5a[attr_name]
+ exists = False
+ else:
+ # Add/modify HDF5 attribute
+ if exists and not change_allowed:
continue
+ data = _prepare_hdf5_write_value(value)
+ h5a[attr_name] = data
- # Write attribute
- value = treedict[key]
- data = _prepare_hdf5_write_value(value)
- h5f[h5name].attrs[attr_name] = data
+
+def _has_nx_class(treedict, key=""):
+ return key + "@NX_class" in treedict or \
+ (key, "NX_class") in treedict
+
+
+def _ensure_nx_class(treedict, parents=tuple()):
+ """Each group needs an "NX_class" attribute.
+ """
+ if _has_nx_class(treedict):
+ return
+ nparents = len(parents)
+ if nparents == 0:
+ treedict[("", "NX_class")] = "NXroot"
+ elif nparents == 1:
+ treedict[("", "NX_class")] = "NXentry"
+ else:
+ treedict[("", "NX_class")] = "NXcollection"
-def nexus_to_h5_dict(treedict, parents=tuple()):
+def nexus_to_h5_dict(
+ treedict, parents=tuple(), add_nx_class=True, has_nx_class=False
+):
"""The following conversions are applied:
* key with "{name}@{attr_name}" notation: key converted to 2-tuple
* key with ">{url}" notation: strip ">" and convert value to
@@ -286,14 +417,20 @@ def nexus_to_h5_dict(treedict, parents=tuple()):
to define sub tree. The ``"@"`` character is used to write attributes.
The ``">"`` prefix is used to define links.
:param parents: Needed to resolve up-links (tuple of HDF5 group names)
+ :param add_nx_class: Add "NX_class" attribute when missing
+ :param has_nx_class: The "NX_class" attribute is defined in the parent
:rtype dict:
"""
+ if not isinstance(treedict, Mapping):
+ raise TypeError("'treedict' must be a dictionary")
copy = dict()
for key, value in treedict.items():
if "@" in key:
+ # HDF5 attribute
key = tuple(key.rsplit("@", 1))
elif key.startswith(">"):
+ # HDF5 link
if isinstance(value, str):
key = key[1:]
first, sep, second = value.partition("::")
@@ -314,10 +451,19 @@ def nexus_to_h5_dict(treedict, parents=tuple()):
value = h5py.SoftLink(first)
elif is_link(value):
key = key[1:]
- if isinstance(value, dict):
- copy[key] = nexus_to_h5_dict(value, parents=parents+(key,))
+ if isinstance(value, Mapping):
+ # HDF5 group
+ key_has_nx_class = add_nx_class and _has_nx_class(treedict, key)
+ copy[key] = nexus_to_h5_dict(
+ value,
+ parents=parents+(key,),
+ add_nx_class=add_nx_class,
+ has_nx_class=key_has_nx_class)
else:
+ # HDF5 dataset or link
copy[key] = value
+ if add_nx_class and not has_nx_class:
+ _ensure_nx_class(copy, parents)
return copy
@@ -336,7 +482,8 @@ def h5_to_nexus_dict(treedict):
copy = dict()
for key, value in treedict.items():
if isinstance(key, tuple):
- assert len(key)==2, "attribute must be defined by 2 values"
+ if len(key) != 2:
+ raise ValueError("HDF5 attribute must be described by 2 values")
key = "%s@%s" % (key[0], key[1])
elif is_softlink(value):
key = ">" + key
@@ -344,7 +491,7 @@ def h5_to_nexus_dict(treedict):
elif is_externallink(value):
key = ">" + key
value = value.filename + "::" + value.path
- if isinstance(value, dict):
+ if isinstance(value, Mapping):
copy[key] = h5_to_nexus_dict(value)
else:
copy[key] = value
@@ -414,10 +561,8 @@ def h5todict(h5file,
scalars). In some cases, you may find that a list of heterogeneous
data types is converted to a numpy array of strings.
- :param h5file: File name or :class:`h5py.File` object or spech5 file or
- fabioh5 file.
- :param str path: Name of HDF5 group to use as dictionary root level,
- to read only a sub-group in the file
+ :param h5file: File name or h5py-like File, Group or Dataset
+ :param str path: Target path in the HDF5 file relative to ``h5file``
:param List[str] exclude_names: Groups and datasets whose name contains
a string in this list will be ignored. Default is None (ignore nothing)
:param bool asarray: True (default) to read scalar as arrays, False to
@@ -431,6 +576,7 @@ def h5todict(h5file,
- 'ignore': Ignore errors
:return: Nested dictionary
"""
+ h5file, path = _normalize_h5_path(h5file, path)
with _SafeH5FileRead(h5file) as h5f:
ddict = {}
if path not in h5f:
@@ -508,7 +654,7 @@ def h5todict(h5file,
return ddict
-def dicttonx(treedict, h5file, h5path="/", **kw):
+def dicttonx(treedict, h5file, h5path="/", add_nx_class=None, **kw):
"""
Write a nested dictionary to a HDF5 file, using string keys as member names.
The NeXus convention is used to identify attributes with ``"@"`` character,
@@ -521,6 +667,8 @@ def dicttonx(treedict, h5file, h5path="/", **kw):
and array-like objects as leafs. The ``"/"`` character can be used
to define sub tree. The ``"@"`` character is used to write attributes.
The ``">"`` prefix is used to define links.
+ :param add_nx_class: Add "NX_class" attribute when missing. By default it
+ is ``True`` when ``update_mode`` is ``"add"`` or ``None``.
The named parameters are passed to dicttoh5.
@@ -557,12 +705,17 @@ def dicttonx(treedict, h5file, h5path="/", **kw):
dicttonx(gauss,"test.h5")
"""
+ h5file, h5path = _normalize_h5_path(h5file, h5path)
parents = tuple(p for p in h5path.split("/") if p)
- nxtreedict = nexus_to_h5_dict(treedict, parents=parents)
+ if add_nx_class is None:
+ add_nx_class = kw.get("update_mode", None) in (None, "add")
+ nxtreedict = nexus_to_h5_dict(
+ treedict, parents=parents, add_nx_class=add_nx_class
+ )
dicttoh5(nxtreedict, h5file, h5path=h5path, **kw)
-def nxtodict(h5file, **kw):
+def nxtodict(h5file, include_attributes=True, **kw):
"""Read a HDF5 file and return a nested dictionary with the complete file
structure and all data.
@@ -571,7 +724,7 @@ def nxtodict(h5file, **kw):
The named parameters are passed to h5todict.
"""
- nxtreedict = h5todict(h5file, **kw)
+ nxtreedict = h5todict(h5file, include_attributes=include_attributes, **kw)
return h5_to_nexus_dict(nxtreedict)
diff --git a/silx/io/h5py_utils.py b/silx/io/h5py_utils.py
new file mode 100644
index 0000000..cbdb44a
--- /dev/null
+++ b/silx/io/h5py_utils.py
@@ -0,0 +1,317 @@
+# coding: utf-8
+# /*##########################################################################
+# Copyright (C) 2016-2021 European Synchrotron Radiation Facility
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+# THE SOFTWARE.
+#
+# ############################################################################*/
+"""
+This module provides utility methods on top of h5py, mainly to handle
+parallel writing and reading.
+"""
+
+__authors__ = ["W. de Nolf"]
+__license__ = "MIT"
+__date__ = "27/01/2020"
+
+
+import os
+import traceback
+import h5py
+
+from .._version import calc_hexversion
+from ..utils import retry as retry_mod
+
+H5PY_HEX_VERSION = calc_hexversion(*h5py.version.version_tuple[:3])
+HDF5_HEX_VERSION = calc_hexversion(*h5py.version.hdf5_version_tuple[:3])
+
+HDF5_SWMR_VERSION = calc_hexversion(*h5py.get_config().swmr_min_hdf5_version[:3])
+HDF5_TRACK_ORDER_VERSION = calc_hexversion(2, 9, 0)
+
+HAS_SWMR = HDF5_HEX_VERSION >= HDF5_SWMR_VERSION
+HAS_TRACK_ORDER = H5PY_HEX_VERSION >= HDF5_TRACK_ORDER_VERSION
+
+
+def _is_h5py_exception(e):
+ for frame in traceback.walk_tb(e.__traceback__):
+ if frame[0].f_locals.get("__package__", None) == "h5py":
+ return True
+ return False
+
+
+def _retry_h5py_error(e):
+ """
+ :param BaseException e:
+ :returns bool:
+ """
+ if _is_h5py_exception(e):
+ if isinstance(e, (OSError, RuntimeError)):
+ return True
+ elif isinstance(e, KeyError):
+ # For example this needs to be retried:
+ # KeyError: 'Unable to open object (bad object header version number)'
+ return "Unable to open object" in str(e)
+ elif isinstance(e, retry_mod.RetryError):
+ return True
+ return False
+
+
+def retry(**kw):
+ """Decorator for a method that needs to be executed until it not longer
+ fails on HDF5 IO. Mainly used for reading an HDF5 file that is being
+ written.
+
+ :param \**kw: see `silx.utils.retry`
+ """
+ kw.setdefault("retry_on_error", _retry_h5py_error)
+ return retry_mod.retry(**kw)
+
+
+def retry_contextmanager(**kw):
+ """Decorator to make a context manager from a method that needs to be
+ entered until it not longer fails on HDF5 IO. Mainly used for reading
+ an HDF5 file that is being written.
+
+ :param \**kw: see `silx.utils.retry_contextmanager`
+ """
+ kw.setdefault("retry_on_error", _retry_h5py_error)
+ return retry_mod.retry_contextmanager(**kw)
+
+
+def retry_in_subprocess(**kw):
+ """Same as `retry` but it also retries segmentation faults.
+
+ On Window you cannot use this decorator with the "@" syntax:
+
+ .. code-block:: python
+
+ def _method(*args, **kw):
+ ...
+
+ method = retry_in_subprocess()(_method)
+
+ :param \**kw: see `silx.utils.retry_in_subprocess`
+ """
+ kw.setdefault("retry_on_error", _retry_h5py_error)
+ return retry_mod.retry_in_subprocess(**kw)
+
+
+def group_has_end_time(h5item):
+ """Returns True when the HDF5 item is a Group with an "end_time"
+ dataset. A reader can use this as an indication that the Group
+ has been fully written (at least if the writer supports this).
+
+ :param Union[h5py.Group,h5py.Dataset] h5item:
+ :returns bool:
+ """
+ if isinstance(h5item, h5py.Group):
+ return "end_time" in h5item
+ else:
+ return False
+
+
+@retry_contextmanager()
+def open_item(filename, name, retry_invalid=False, validate=None):
+ """Yield an HDF5 dataset or group (retry until it can be instantiated).
+
+ :param str filename:
+ :param bool retry_invalid: retry when item is missing or not valid
+ :param callable or None validate:
+ :yields Dataset, Group or None:
+ """
+ with File(filename) as h5file:
+ try:
+ item = h5file[name]
+ except KeyError as e:
+ if "doesn't exist" in str(e):
+ if retry_invalid:
+ raise retry_mod.RetryError
+ else:
+ item = None
+ else:
+ raise
+ if callable(validate) and item is not None:
+ if not validate(item):
+ if retry_invalid:
+ raise retry_mod.RetryError
+ else:
+ item = None
+ yield item
+
+
+def _top_level_names(filename, include_only=group_has_end_time):
+ """Return all valid top-level HDF5 names.
+
+ :param str filename:
+ :param callable or None include_only:
+ :returns list(str):
+ """
+ with File(filename) as h5file:
+ try:
+ if callable(include_only):
+ return [name for name in h5file["/"] if include_only(h5file[name])]
+ else:
+ return list(h5file["/"])
+ except KeyError:
+ raise retry_mod.RetryError
+
+
+top_level_names = retry()(_top_level_names)
+safe_top_level_names = retry_in_subprocess()(_top_level_names)
+
+
+class File(h5py.File):
+ """Takes care of HDF5 file locking and SWMR mode without the need
+ to handle those explicitely.
+
+ When using this class, you cannot open different files simultatiously
+ with different modes because the locking flag is an environment variable.
+ """
+
+ _HDF5_FILE_LOCKING = None
+ _NOPEN = 0
+ _SWMR_LIBVER = "latest"
+
+ def __init__(
+ self,
+ filename,
+ mode=None,
+ enable_file_locking=None,
+ swmr=None,
+ libver=None,
+ **kwargs
+ ):
+ """The arguments `enable_file_locking` and `swmr` should not be
+ specified explicitly for normal use cases.
+
+ :param str filename:
+ :param str or None mode: read-only by default
+ :param bool or None enable_file_locking: by default it is disabled for `mode='r'`
+ and `swmr=False` and enabled for all
+ other modes.
+ :param bool or None swmr: try both modes when `mode='r'` and `swmr=None`
+ :param **kwargs: see `h5py.File.__init__`
+ """
+ if mode is None:
+ mode = "r"
+ elif mode not in ("r", "w", "w-", "x", "a", "r+"):
+ raise ValueError("invalid mode {}".format(mode))
+ if not HAS_SWMR:
+ swmr = False
+
+ if enable_file_locking is None:
+ enable_file_locking = bool(mode != "r" or swmr)
+ if self._NOPEN:
+ self._check_locking_env(enable_file_locking)
+ else:
+ self._set_locking_env(enable_file_locking)
+
+ if swmr and libver is None:
+ libver = self._SWMR_LIBVER
+
+ if HAS_TRACK_ORDER:
+ kwargs.setdefault("track_order", True)
+ try:
+ super().__init__(filename, mode=mode, swmr=swmr, libver=libver, **kwargs)
+ except OSError as e:
+ # wlock wSWMR rlock rSWMR OSError: Unable to open file (...)
+ # 1 TRUE FALSE FALSE FALSE -
+ # 2 TRUE FALSE FALSE TRUE -
+ # 3 TRUE FALSE TRUE FALSE unable to lock file, errno = 11, error message = 'Resource temporarily unavailable'
+ # 4 TRUE FALSE TRUE TRUE unable to lock file, errno = 11, error message = 'Resource temporarily unavailable'
+ # 5 TRUE TRUE FALSE FALSE file is already open for write (may use <h5clear file> to clear file consistency flags)
+ # 6 TRUE TRUE FALSE TRUE -
+ # 7 TRUE TRUE TRUE FALSE file is already open for write (may use <h5clear file> to clear file consistency flags)
+ # 8 TRUE TRUE TRUE TRUE -
+ if (
+ mode == "r"
+ and swmr is None
+ and "file is already open for write" in str(e)
+ ):
+ # Try reading in SWMR mode (situation 5 and 7)
+ swmr = True
+ if libver is None:
+ libver = self._SWMR_LIBVER
+ super().__init__(
+ filename, mode=mode, swmr=swmr, libver=libver, **kwargs
+ )
+ else:
+ raise
+ else:
+ self._add_nopen(1)
+ try:
+ if mode != "r" and swmr:
+ # Try setting writer in SWMR mode
+ self.swmr_mode = True
+ except Exception:
+ self.close()
+ raise
+
+ @classmethod
+ def _add_nopen(cls, v):
+ cls._NOPEN = max(cls._NOPEN + v, 0)
+
+ def close(self):
+ super().close()
+ self._add_nopen(-1)
+ if not self._NOPEN:
+ self._restore_locking_env()
+
+ def _set_locking_env(self, enable):
+ self._backup_locking_env()
+ if enable:
+ os.environ["HDF5_USE_FILE_LOCKING"] = "TRUE"
+ elif enable is None:
+ try:
+ del os.environ["HDF5_USE_FILE_LOCKING"]
+ except KeyError:
+ pass
+ else:
+ os.environ["HDF5_USE_FILE_LOCKING"] = "FALSE"
+
+ def _get_locking_env(self):
+ v = os.environ.get("HDF5_USE_FILE_LOCKING")
+ if v == "TRUE":
+ return True
+ elif v is None:
+ return None
+ else:
+ return False
+
+ def _check_locking_env(self, enable):
+ if enable != self._get_locking_env():
+ if enable:
+ raise RuntimeError(
+ "Close all HDF5 files before enabling HDF5 file locking"
+ )
+ else:
+ raise RuntimeError(
+ "Close all HDF5 files before disabling HDF5 file locking"
+ )
+
+ def _backup_locking_env(self):
+ v = os.environ.get("HDF5_USE_FILE_LOCKING")
+ if v is None:
+ self._HDF5_FILE_LOCKING = None
+ else:
+ self._HDF5_FILE_LOCKING = v == "TRUE"
+
+ def _restore_locking_env(self):
+ self._set_locking_env(self._HDF5_FILE_LOCKING)
+ self._HDF5_FILE_LOCKING = None
diff --git a/silx/io/spech5.py b/silx/io/spech5.py
index 79fd2e4..1eaec7c 100644
--- a/silx/io/spech5.py
+++ b/silx/io/spech5.py
@@ -1,6 +1,6 @@
# coding: utf-8
# /*##########################################################################
-# Copyright (C) 2016-2018 European Synchrotron Radiation Facility
+# Copyright (C) 2016-2021 European Synchrotron Radiation Facility
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
@@ -194,7 +194,7 @@ import numpy
import six
from silx import version as silx_version
-from .specfile import SpecFile
+from .specfile import SpecFile, SfErrColNotFound
from . import commonh5
__authors__ = ["P. Knobel", "D. Naudet"]
@@ -670,6 +670,10 @@ class PositionersGroup(commonh5.Group, SpecH5Group):
def __init__(self, parent, scan):
commonh5.Group.__init__(self, name="positioners", parent=parent,
attrs={"NX_class": to_h5py_utf8("NXcollection")})
+
+ dataset_info = [] # Store list of positioner's (name, value)
+ is_error = False # True if error encountered
+
for motor_name in scan.motor_names:
safe_motor_name = motor_name.replace("/", "%")
if motor_name in scan.labels and scan.data.shape[0] > 0:
@@ -678,10 +682,24 @@ class PositionersGroup(commonh5.Group, SpecH5Group):
else:
# Take value from #P scan header.
# (may return float("inf") if #P line is missing from scan hdr)
- motor_value = scan.motor_position_by_name(motor_name)
- self.add_node(SpecH5NodeDataset(name=safe_motor_name,
- data=motor_value,
- parent=self))
+ try:
+ motor_value = scan.motor_position_by_name(motor_name)
+ except SfErrColNotFound:
+ is_error = True
+ motor_value = float('inf')
+ dataset_info.append((safe_motor_name, motor_value))
+
+ if is_error: # Filter-out scalar values
+ logger1.warning("Mismatching number of elements in #P and #O: Ignoring")
+ dataset_info = [
+ (name, value) for name, value in dataset_info
+ if not isinstance(value, float)]
+
+ for name, value in dataset_info:
+ self.add_node(SpecH5NodeDataset(
+ name=name,
+ data=value,
+ parent=self))
class InstrumentMcaGroup(commonh5.Group, SpecH5Group):
diff --git a/silx/io/test/__init__.py b/silx/io/test/__init__.py
index a309ee9..68b6e9b 100644
--- a/silx/io/test/__init__.py
+++ b/silx/io/test/__init__.py
@@ -40,6 +40,7 @@ from .test_nxdata import suite as test_nxdata_suite
from .test_commonh5 import suite as test_commonh5_suite
from .test_rawh5 import suite as test_rawh5_suite
from .test_url import suite as test_url_suite
+from .test_h5py_utils import suite as test_h5py_utils_suite
def suite():
@@ -56,4 +57,5 @@ def suite():
test_suite.addTest(test_commonh5_suite())
test_suite.addTest(test_rawh5_suite())
test_suite.addTest(test_url_suite())
+ test_suite.addTest(test_h5py_utils_suite())
return test_suite
diff --git a/silx/io/test/test_dictdump.py b/silx/io/test/test_dictdump.py
index b99116b..93c9183 100644
--- a/silx/io/test/test_dictdump.py
+++ b/silx/io/test/test_dictdump.py
@@ -33,6 +33,7 @@ import os
import tempfile
import unittest
import h5py
+from copy import deepcopy
from collections import defaultdict
@@ -72,7 +73,63 @@ link_attrs["links"]["absolute_softlink"] = h5py.SoftLink("/links/group/dataset")
link_attrs["links"]["external_link"] = h5py.ExternalLink(ext_filename, "/ext_group/dataset")
-class TestDictToH5(unittest.TestCase):
+class DictTestCase(unittest.TestCase):
+
+ def assertRecursiveEqual(self, expected, actual, nodes=tuple()):
+ err_msg = "\n\n Tree nodes: {}".format(nodes)
+ if isinstance(expected, dict):
+ self.assertTrue(isinstance(actual, dict), msg=err_msg)
+ self.assertEqual(
+ set(expected.keys()),
+ set(actual.keys()),
+ msg=err_msg
+ )
+ for k in actual:
+ self.assertRecursiveEqual(
+ expected[k],
+ actual[k],
+ nodes=nodes + (k,),
+ )
+ return
+ if isinstance(actual, numpy.ndarray):
+ actual = actual.tolist()
+ if isinstance(expected, numpy.ndarray):
+ expected = expected.tolist()
+
+ self.assertEqual(expected, actual, msg=err_msg)
+
+
+class H5DictTestCase(DictTestCase):
+
+ def _dictRoundTripNormalize(self, treedict):
+ """Convert the dictionary as expected from a round-trip
+ treedict -> dicttoh5 -> h5todict -> newtreedict
+ """
+ for key, value in list(treedict.items()):
+ if isinstance(value, dict):
+ self._dictRoundTripNormalize(value)
+
+ # Expand treedict[("group", "attr_name")]
+ # to treedict["group"]["attr_name"]
+ for key, value in list(treedict.items()):
+ if not isinstance(key, tuple):
+ continue
+ # Put the attribute inside the group
+ grpname, attr = key
+ if not grpname:
+ continue
+ group = treedict.setdefault(grpname, dict())
+ if isinstance(group, dict):
+ del treedict[key]
+ group[("", attr)] = value
+
+ def dictRoundTripNormalize(self, treedict):
+ treedict2 = deepcopy(treedict)
+ self._dictRoundTripNormalize(treedict2)
+ return treedict2
+
+
+class TestDictToH5(H5DictTestCase):
def setUp(self):
self.tempdir = tempfile.mkdtemp()
self.h5_fname = os.path.join(self.tempdir, "cityattrs.h5")
@@ -110,14 +167,13 @@ class TestDictToH5(unittest.TestCase):
min(ddict["city attributes"]["Europe"]["France"]["Grenoble"]["coordinates"]),
5.7196)
- def testH5Overwrite(self):
+ def testH5OverwriteDeprecatedApi(self):
dd = ConfigDict({'t': True})
dicttoh5(h5file=self.h5_fname, treedict=dd, mode='a')
dd = ConfigDict({'t': False})
- with TestLogging(dictdump_logger, warning=1):
- dicttoh5(h5file=self.h5_fname, treedict=dd, mode='a',
- overwrite_data=False)
+ dicttoh5(h5file=self.h5_fname, treedict=dd, mode='a',
+ overwrite_data=False)
res = h5todict(self.h5_fname)
assert(res['t'] == True)
@@ -200,8 +256,7 @@ class TestDictToH5(unittest.TestCase):
("group", "attr"): 10,
}
with h5py.File(self.h5_fname, "w") as h5file:
- with TestLogging(dictdump_logger, warning=1):
- dictdump.dicttoh5(ddict, h5file)
+ dictdump.dicttoh5(ddict, h5file)
self.assertEqual(h5file["group"].attrs['attr'], 10)
def testFlatDict(self):
@@ -241,8 +296,223 @@ class TestDictToH5(unittest.TestCase):
numpy.testing.assert_array_equal(h5py_read_dataset(h5file["darks"]["0"]),
ddict['darks']['0'])
-
-class TestH5ToDict(unittest.TestCase):
+ def testOverwrite(self):
+ # Tree structure that will be tested
+ group1 = {
+ ("", "attr2"): "original2",
+ "dset1": 0,
+ "dset2": [0, 1],
+ ("dset1", "attr1"): "original1",
+ ("dset1", "attr2"): "original2",
+ ("dset2", "attr1"): "original1",
+ ("dset2", "attr2"): "original2",
+ }
+ group2 = {
+ "subgroup1": group1.copy(),
+ "subgroup2": group1.copy(),
+ ("subgroup1", "attr1"): "original1",
+ ("subgroup2", "attr1"): "original1"
+ }
+ group2.update(group1)
+ # initial HDF5 tree
+ otreedict = {
+ ('', 'attr1'): "original1",
+ ('', 'attr2'): "original2",
+ 'group1': group1,
+ 'group2': group2,
+ ('group1', 'attr1'): "original1",
+ ('group2', 'attr1'): "original1"
+ }
+ wtreedict = None # dumped dictionary
+ etreedict = None # expected HDF5 tree after dump
+
+ def reset_file():
+ dicttoh5(
+ otreedict,
+ h5file=self.h5_fname,
+ mode="w",
+ )
+
+ def append_file(update_mode):
+ dicttoh5(
+ wtreedict,
+ h5file=self.h5_fname,
+ mode="a",
+ update_mode=update_mode
+ )
+
+ def assert_file():
+ rtreedict = h5todict(
+ self.h5_fname,
+ include_attributes=True,
+ asarray=False
+ )
+ netreedict = self.dictRoundTripNormalize(etreedict)
+ try:
+ self.assertRecursiveEqual(netreedict, rtreedict)
+ except AssertionError:
+ from pprint import pprint
+ print("\nDUMP:")
+ pprint(wtreedict)
+ print("\nEXPECTED:")
+ pprint(netreedict)
+ print("\nHDF5:")
+ pprint(rtreedict)
+ raise
+
+ def assert_append(update_mode):
+ append_file(update_mode)
+ assert_file()
+
+ # Test wrong arguments
+ with self.assertRaises(ValueError):
+ dicttoh5(
+ otreedict,
+ h5file=self.h5_fname,
+ mode="w",
+ update_mode="wrong-value"
+ )
+
+ # No writing
+ reset_file()
+ etreedict = deepcopy(otreedict)
+ assert_file()
+
+ # Write identical dictionary
+ wtreedict = deepcopy(otreedict)
+
+ reset_file()
+ etreedict = deepcopy(otreedict)
+ for update_mode in [None, "add", "modify", "replace"]:
+ assert_append(update_mode)
+
+ # Write empty dictionary
+ wtreedict = dict()
+
+ reset_file()
+ etreedict = deepcopy(otreedict)
+ for update_mode in [None, "add", "modify", "replace"]:
+ assert_append(update_mode)
+
+ # Modified dataset
+ wtreedict = dict()
+ wtreedict["group2"] = dict()
+ wtreedict["group2"]["subgroup2"] = dict()
+ wtreedict["group2"]["subgroup2"]["dset1"] = {"dset3": [10, 20]}
+ wtreedict["group2"]["subgroup2"]["dset2"] = [10, 20]
+
+ reset_file()
+ etreedict = deepcopy(otreedict)
+ for update_mode in [None, "add"]:
+ assert_append(update_mode)
+
+ etreedict["group2"]["subgroup2"]["dset2"] = [10, 20]
+ assert_append("modify")
+
+ etreedict["group2"] = dict()
+ del etreedict[("group2", "attr1")]
+ etreedict["group2"]["subgroup2"] = dict()
+ etreedict["group2"]["subgroup2"]["dset1"] = {"dset3": [10, 20]}
+ etreedict["group2"]["subgroup2"]["dset2"] = [10, 20]
+ assert_append("replace")
+
+ # Modified group
+ wtreedict = dict()
+ wtreedict["group2"] = dict()
+ wtreedict["group2"]["subgroup2"] = [0, 1]
+
+ reset_file()
+ etreedict = deepcopy(otreedict)
+ for update_mode in [None, "add", "modify"]:
+ assert_append(update_mode)
+
+ etreedict["group2"] = dict()
+ del etreedict[("group2", "attr1")]
+ etreedict["group2"]["subgroup2"] = [0, 1]
+ assert_append("replace")
+
+ # Modified attribute
+ wtreedict = dict()
+ wtreedict["group2"] = dict()
+ wtreedict["group2"]["subgroup2"] = dict()
+ wtreedict["group2"]["subgroup2"][("dset1", "attr1")] = "modified"
+
+ reset_file()
+ etreedict = deepcopy(otreedict)
+ for update_mode in [None, "add"]:
+ assert_append(update_mode)
+
+ etreedict["group2"]["subgroup2"][("dset1", "attr1")] = "modified"
+ assert_append("modify")
+
+ etreedict["group2"] = dict()
+ del etreedict[("group2", "attr1")]
+ etreedict["group2"]["subgroup2"] = dict()
+ etreedict["group2"]["subgroup2"]["dset1"] = dict()
+ etreedict["group2"]["subgroup2"]["dset1"][("", "attr1")] = "modified"
+ assert_append("replace")
+
+ # Delete group
+ wtreedict = dict()
+ wtreedict["group2"] = dict()
+ wtreedict["group2"]["subgroup2"] = None
+
+ reset_file()
+ etreedict = deepcopy(otreedict)
+ for update_mode in [None, "add"]:
+ assert_append(update_mode)
+
+ del etreedict["group2"]["subgroup2"]
+ del etreedict["group2"][("subgroup2", "attr1")]
+ assert_append("modify")
+
+ etreedict["group2"] = dict()
+ del etreedict[("group2", "attr1")]
+ assert_append("replace")
+
+ # Delete dataset
+ wtreedict = dict()
+ wtreedict["group2"] = dict()
+ wtreedict["group2"]["subgroup2"] = dict()
+ wtreedict["group2"]["subgroup2"]["dset2"] = None
+
+ reset_file()
+ etreedict = deepcopy(otreedict)
+ for update_mode in [None, "add"]:
+ assert_append(update_mode)
+
+ del etreedict["group2"]["subgroup2"]["dset2"]
+ del etreedict["group2"]["subgroup2"][("dset2", "attr1")]
+ del etreedict["group2"]["subgroup2"][("dset2", "attr2")]
+ assert_append("modify")
+
+ etreedict["group2"] = dict()
+ del etreedict[("group2", "attr1")]
+ etreedict["group2"]["subgroup2"] = dict()
+ assert_append("replace")
+
+ # Delete attribute
+ wtreedict = dict()
+ wtreedict["group2"] = dict()
+ wtreedict["group2"]["subgroup2"] = dict()
+ wtreedict["group2"]["subgroup2"][("dset2", "attr1")] = None
+
+ reset_file()
+ etreedict = deepcopy(otreedict)
+ for update_mode in [None, "add"]:
+ assert_append(update_mode)
+
+ del etreedict["group2"]["subgroup2"][("dset2", "attr1")]
+ assert_append("modify")
+
+ etreedict["group2"] = dict()
+ del etreedict[("group2", "attr1")]
+ etreedict["group2"]["subgroup2"] = dict()
+ etreedict["group2"]["subgroup2"]["dset2"] = dict()
+ assert_append("replace")
+
+
+class TestH5ToDict(H5DictTestCase):
def setUp(self):
self.tempdir = tempfile.mkdtemp()
self.h5_fname = os.path.join(self.tempdir, "cityattrs.h5")
@@ -313,7 +583,7 @@ class TestH5ToDict(unittest.TestCase):
numpy.testing.assert_array_equal(ddict[("", "attr_2utf8")], adict[("", "attr_2utf8")])
-class TestDictToNx(unittest.TestCase):
+class TestDictToNx(H5DictTestCase):
def setUp(self):
self.tempdir = tempfile.mkdtemp()
self.h5_fname = os.path.join(self.tempdir, "nx.h5")
@@ -416,8 +686,121 @@ class TestDictToNx(unittest.TestCase):
with h5py.File(self.h5_fname, "r") as h5file:
self.assertEqual(h5file["/links/group/subgroup/relative_softlink"][()], 10)
-
-class TestNxToDict(unittest.TestCase):
+ def testOverwrite(self):
+ entry_name = "entry"
+ wtreedict = {
+ "group1": {"a": 1, "b": 2},
+ "group2@attr3": "attr3",
+ "group2@attr4": "attr4",
+ "group2": {
+ "@attr1": "attr1",
+ "@attr2": "attr2",
+ "c": 3,
+ "d": 4,
+ "dataset4": 8,
+ "dataset4@units": "keV",
+ },
+ "group3": {"subgroup": {"e": 9, "f": 10}},
+ "dataset1": 5,
+ "dataset2": 6,
+ "dataset3": 7,
+ "dataset3@units": "mm",
+ }
+ esubtree = {
+ "@NX_class": "NXentry",
+ "group1": {"@NX_class": "NXcollection", "a": 1, "b": 2},
+ "group2": {
+ "@NX_class": "NXcollection",
+ "@attr1": "attr1",
+ "@attr2": "attr2",
+ "@attr3": "attr3",
+ "@attr4": "attr4",
+ "c": 3,
+ "d": 4,
+ "dataset4": 8,
+ "dataset4@units": "keV",
+ },
+ "group3": {
+ "@NX_class": "NXcollection",
+ "subgroup": {"@NX_class": "NXcollection", "e": 9, "f": 10},
+ },
+ "dataset1": 5,
+ "dataset2": 6,
+ "dataset3": 7,
+ "dataset3@units": "mm",
+ }
+ etreedict = {entry_name: esubtree}
+
+ def append_file(update_mode, add_nx_class):
+ dictdump.dicttonx(
+ wtreedict,
+ h5file=self.h5_fname,
+ mode="a",
+ h5path=entry_name,
+ update_mode=update_mode,
+ add_nx_class=add_nx_class
+ )
+
+ def assert_file():
+ rtreedict = dictdump.nxtodict(
+ self.h5_fname,
+ include_attributes=True,
+ asarray=False,
+ )
+ netreedict = self.dictRoundTripNormalize(etreedict)
+ try:
+ self.assertRecursiveEqual(netreedict, rtreedict)
+ except AssertionError:
+ from pprint import pprint
+ print("\nDUMP:")
+ pprint(wtreedict)
+ print("\nEXPECTED:")
+ pprint(netreedict)
+ print("\nHDF5:")
+ pprint(rtreedict)
+ raise
+
+ def assert_append(update_mode, add_nx_class=None):
+ append_file(update_mode, add_nx_class=add_nx_class)
+ assert_file()
+
+ # First to an empty file
+ assert_append(None)
+
+ # Add non-existing attributes/datasets/groups
+ wtreedict["group1"].pop("a")
+ wtreedict["group2"].pop("@attr1")
+ wtreedict["group2"]["@attr2"] = "attr3" # only for update
+ wtreedict["group2"]["@type"] = "test"
+ wtreedict["group2"]["dataset4"] = 9 # only for update
+ del wtreedict["group2"]["dataset4@units"]
+ wtreedict["group3"] = {}
+ esubtree["group2"]["@type"] = "test"
+ assert_append("add")
+
+ # Add update existing attributes and datasets
+ esubtree["group2"]["@attr2"] = "attr3"
+ esubtree["group2"]["dataset4"] = 9
+ assert_append("modify")
+
+ # Do not add missing NX_class by default when updating
+ wtreedict["group2"]["@NX_class"] = "NXprocess"
+ esubtree["group2"]["@NX_class"] = "NXprocess"
+ assert_append("modify")
+ del wtreedict["group2"]["@NX_class"]
+ assert_append("modify")
+
+ # Overwrite existing groups/datasets/attributes
+ esubtree["group1"].pop("a")
+ esubtree["group2"].pop("@attr1")
+ esubtree["group2"]["@NX_class"] = "NXcollection"
+ esubtree["group2"]["dataset4"] = 9
+ del esubtree["group2"]["dataset4@units"]
+ esubtree["group3"] = {"@NX_class": "NXcollection"}
+ assert_append("replace", add_nx_class=True)
+
+
+class TestNxToDict(H5DictTestCase):
def setUp(self):
self.tempdir = tempfile.mkdtemp()
self.h5_fname = os.path.join(self.tempdir, "nx.h5")
@@ -510,7 +893,7 @@ class TestNxToDict(unittest.TestCase):
h5todict(self.h5_fname, path="/Mars", errors='raise')
-class TestDictToJson(unittest.TestCase):
+class TestDictToJson(DictTestCase):
def setUp(self):
self.dir_path = tempfile.mkdtemp()
self.json_fname = os.path.join(self.dir_path, "cityattrs.json")
@@ -528,7 +911,7 @@ class TestDictToJson(unittest.TestCase):
self.assertIn('"inhabitants": 160215', json_content)
-class TestDictToIni(unittest.TestCase):
+class TestDictToIni(DictTestCase):
def setUp(self):
self.dir_path = tempfile.mkdtemp()
self.ini_fname = os.path.join(self.dir_path, "test.ini")
diff --git a/silx/io/test/test_h5py_utils.py b/silx/io/test/test_h5py_utils.py
new file mode 100644
index 0000000..2e2e3dd
--- /dev/null
+++ b/silx/io/test/test_h5py_utils.py
@@ -0,0 +1,397 @@
+# coding: utf-8
+# /*##########################################################################
+# Copyright (C) 2016-2017 European Synchrotron Radiation Facility
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+# THE SOFTWARE.
+#
+# ############################################################################*/
+"""Tests for h5py utilities"""
+
+__authors__ = ["W. de Nolf"]
+__license__ = "MIT"
+__date__ = "27/01/2020"
+
+
+import unittest
+import os
+import sys
+import time
+import shutil
+import tempfile
+import threading
+import multiprocessing
+from contextlib import contextmanager
+
+from .. import h5py_utils
+from ...utils.retry import RetryError, RetryTimeoutError
+
+IS_WINDOWS = sys.platform == "win32"
+
+
+def _subprocess_context_main(queue, contextmgr, *args, **kw):
+ try:
+ with contextmgr(*args, **kw):
+ queue.put(None)
+ threading.Event().wait()
+ except Exception:
+ queue.put(None)
+ raise
+
+
+@contextmanager
+def _subprocess_context(contextmgr, *args, **kw):
+ timeout = kw.pop("timeout", 10)
+ queue = multiprocessing.Queue(maxsize=1)
+ p = multiprocessing.Process(
+ target=_subprocess_context_main, args=(queue, contextmgr) + args, kwargs=kw
+ )
+ p.start()
+ try:
+ queue.get(timeout=timeout)
+ yield
+ finally:
+ try:
+ p.kill()
+ except AttributeError:
+ p.terminate()
+ p.join(timeout)
+
+
+@contextmanager
+def _open_context(filename, **kw):
+ with h5py_utils.File(filename, **kw) as f:
+ if kw.get("mode") == "w":
+ f["check"] = True
+ f.flush()
+ yield f
+
+
+def _cause_segfault():
+ import ctypes
+
+ i = ctypes.c_char(b"a")
+ j = ctypes.pointer(i)
+ c = 0
+ while True:
+ j[c] = b"a"
+ c += 1
+
+
+def _top_level_names_test(txtfilename, *args, **kw):
+ sys.stderr = open(os.devnull, "w")
+
+ with open(txtfilename, mode="r") as f:
+ failcounter = int(f.readline().strip())
+
+ ncausefailure = kw.pop("ncausefailure")
+ faildelay = kw.pop("faildelay")
+ if failcounter < ncausefailure:
+ time.sleep(faildelay)
+ failcounter += 1
+ with open(txtfilename, mode="w") as f:
+ f.write(str(failcounter))
+ if failcounter % 2:
+ raise RetryError
+ else:
+ _cause_segfault()
+ return h5py_utils._top_level_names(*args, **kw)
+
+
+top_level_names_test = h5py_utils.retry_in_subprocess()(_top_level_names_test)
+
+
+def subtests(test):
+ def wrapper(self):
+ for _ in self._subtests():
+ with self.subTest(**self._subtest_options):
+ test(self)
+
+ return wrapper
+
+
+class TestH5pyUtils(unittest.TestCase):
+ def setUp(self):
+ self.test_dir = tempfile.mkdtemp()
+
+ def tearDown(self):
+ shutil.rmtree(self.test_dir)
+
+ def _subtests(self):
+ self._subtest_options = {"mode": "w"}
+ self.filename_generator = self._filenames()
+ yield
+ self._subtest_options = {"mode": "w", "libver": "latest"}
+ self.filename_generator = self._filenames()
+ yield
+
+ @property
+ def _liber_allows_concurrent_access(self):
+ return self._subtest_options.get("libver") in [None, "earliest", "v18"]
+
+ def _filenames(self):
+ i = 1
+ while True:
+ filename = os.path.join(self.test_dir, "file{}.h5".format(i))
+ with self._open_context(filename):
+ pass
+ yield filename
+ i += 1
+
+ def _new_filename(self):
+ return next(self.filename_generator)
+
+ @contextmanager
+ def _open_context(self, filename, **kwargs):
+ kw = self._subtest_options
+ kw.update(kwargs)
+ with _open_context(filename, **kw) as f:
+
+ yield f
+
+ @contextmanager
+ def _open_context_subprocess(self, filename, **kwargs):
+ kw = self._subtest_options
+ kw.update(kwargs)
+ with _subprocess_context(_open_context, filename, **kw):
+ yield
+
+ def _assert_hdf5_data(self, f):
+ self.assertTrue(f["check"][()])
+
+ def _validate_hdf5_data(self, filename, swmr=False):
+ with self._open_context(filename, mode="r") as f:
+ self.assertEqual(f.swmr_mode, swmr)
+ self._assert_hdf5_data(f)
+
+ @subtests
+ def test_modes_single_process(self):
+ orig = os.environ.get("HDF5_USE_FILE_LOCKING")
+ filename1 = self._new_filename()
+ self.assertEqual(orig, os.environ.get("HDF5_USE_FILE_LOCKING"))
+ filename2 = self._new_filename()
+ self.assertEqual(orig, os.environ.get("HDF5_USE_FILE_LOCKING"))
+ with self._open_context(filename1, mode="r"):
+ with self._open_context(filename2, mode="r"):
+ pass
+ for mode in ["w", "a"]:
+ with self.assertRaises(RuntimeError):
+ with self._open_context(filename2, mode=mode):
+ pass
+ self.assertEqual(orig, os.environ.get("HDF5_USE_FILE_LOCKING"))
+ with self._open_context(filename1, mode="a"):
+ for mode in ["w", "a"]:
+ with self._open_context(filename2, mode=mode):
+ pass
+ with self.assertRaises(RuntimeError):
+ with self._open_context(filename2, mode="r"):
+ pass
+ self.assertEqual(orig, os.environ.get("HDF5_USE_FILE_LOCKING"))
+
+ @subtests
+ def test_modes_multi_process(self):
+ if not self._liber_allows_concurrent_access:
+ # A concurrent reader with HDF5_USE_FILE_LOCKING=FALSE is
+ # no longer works with HDF5 >=1.10 (you get an exception
+ # when trying to open the file)
+ return
+ filename = self._new_filename()
+
+ # File open by truncating writer
+ with self._open_context_subprocess(filename, mode="w"):
+ with self._open_context(filename, mode="r") as f:
+ self._assert_hdf5_data(f)
+ if IS_WINDOWS:
+ with self._open_context(filename, mode="a") as f:
+ self._assert_hdf5_data(f)
+ else:
+ with self.assertRaises(OSError):
+ with self._open_context(filename, mode="a") as f:
+ pass
+ self._validate_hdf5_data(filename)
+
+ # File open by appending writer
+ with self._open_context_subprocess(filename, mode="a"):
+ with self._open_context(filename, mode="r") as f:
+ self._assert_hdf5_data(f)
+ if IS_WINDOWS:
+ with self._open_context(filename, mode="a") as f:
+ self._assert_hdf5_data(f)
+ else:
+ with self.assertRaises(OSError):
+ with self._open_context(filename, mode="a") as f:
+ pass
+ self._validate_hdf5_data(filename)
+
+ # File open by reader
+ with self._open_context_subprocess(filename, mode="r"):
+ with self._open_context(filename, mode="r") as f:
+ self._assert_hdf5_data(f)
+ with self._open_context(filename, mode="a") as f:
+ pass
+ self._validate_hdf5_data(filename)
+
+ # File open by locking reader
+ with _subprocess_context(
+ _open_context, filename, mode="r", enable_file_locking=True
+ ):
+ with self._open_context(filename, mode="r") as f:
+ self._assert_hdf5_data(f)
+ if IS_WINDOWS:
+ with self._open_context(filename, mode="a") as f:
+ self._assert_hdf5_data(f)
+ else:
+ with self.assertRaises(OSError):
+ with self._open_context(filename, mode="a") as f:
+ pass
+ self._validate_hdf5_data(filename)
+
+ @subtests
+ @unittest.skipIf(not h5py_utils.HAS_SWMR, "SWMR not supported")
+ def test_modes_multi_process_swmr(self):
+ filename = self._new_filename()
+
+ with self._open_context(filename, mode="w", libver="latest") as f:
+ pass
+
+ # File open by SWMR writer
+ with self._open_context_subprocess(filename, mode="a", swmr=True):
+ with self._open_context(filename, mode="r") as f:
+ assert f.swmr_mode
+ self._assert_hdf5_data(f)
+ with self.assertRaises(OSError):
+ with self._open_context(filename, mode="a") as f:
+ pass
+ self._validate_hdf5_data(filename, swmr=True)
+
+ @subtests
+ def test_retry_defaults(self):
+ filename = self._new_filename()
+
+ names = h5py_utils.top_level_names(filename)
+ self.assertEqual(names, [])
+
+ names = h5py_utils.safe_top_level_names(filename)
+ self.assertEqual(names, [])
+
+ names = h5py_utils.top_level_names(filename, include_only=None)
+ self.assertEqual(names, ["check"])
+
+ names = h5py_utils.safe_top_level_names(filename, include_only=None)
+ self.assertEqual(names, ["check"])
+
+ with h5py_utils.open_item(filename, "/check", validate=lambda x: False) as item:
+ self.assertEqual(item, None)
+
+ with h5py_utils.open_item(filename, "/check", validate=None) as item:
+ self.assertTrue(item[()])
+
+ with self.assertRaises(RetryTimeoutError):
+ with h5py_utils.open_item(
+ filename,
+ "/check",
+ retry_timeout=0.1,
+ retry_invalid=True,
+ validate=lambda x: False,
+ ) as item:
+ pass
+
+ ncall = 0
+
+ def validate(item):
+ nonlocal ncall
+ if ncall >= 1:
+ return True
+ else:
+ ncall += 1
+ raise RetryError
+
+ with h5py_utils.open_item(
+ filename, "/check", validate=validate, retry_timeout=1, retry_invalid=True
+ ) as item:
+ self.assertTrue(item[()])
+
+ @subtests
+ def test_retry_custom(self):
+ filename = self._new_filename()
+ ncausefailure = 3
+ faildelay = 0.1
+ sufficient_timeout = ncausefailure * (faildelay + 10)
+ insufficient_timeout = ncausefailure * faildelay * 0.5
+
+ @h5py_utils.retry_contextmanager()
+ def open_item(filename, name):
+ nonlocal failcounter
+ if failcounter < ncausefailure:
+ time.sleep(faildelay)
+ failcounter += 1
+ raise RetryError
+ with h5py_utils.File(filename) as h5file:
+ yield h5file[name]
+
+ failcounter = 0
+ kw = {"retry_timeout": sufficient_timeout}
+ with open_item(filename, "/check", **kw) as item:
+ self.assertTrue(item[()])
+
+ failcounter = 0
+ kw = {"retry_timeout": insufficient_timeout}
+ with self.assertRaises(RetryTimeoutError):
+ with open_item(filename, "/check", **kw) as item:
+ pass
+
+ @subtests
+ def test_retry_in_subprocess(self):
+ filename = self._new_filename()
+ txtfilename = os.path.join(self.test_dir, "failcounter.txt")
+ ncausefailure = 3
+ faildelay = 0.1
+ sufficient_timeout = ncausefailure * (faildelay + 10)
+ insufficient_timeout = ncausefailure * faildelay * 0.5
+
+ kw = {
+ "retry_timeout": sufficient_timeout,
+ "include_only": None,
+ "ncausefailure": ncausefailure,
+ "faildelay": faildelay,
+ }
+ with open(txtfilename, mode="w") as f:
+ f.write("0")
+ names = top_level_names_test(txtfilename, filename, **kw)
+ self.assertEqual(names, ["check"])
+
+ kw = {
+ "retry_timeout": insufficient_timeout,
+ "include_only": None,
+ "ncausefailure": ncausefailure,
+ "faildelay": faildelay,
+ }
+ with open(txtfilename, mode="w") as f:
+ f.write("0")
+ with self.assertRaises(RetryTimeoutError):
+ top_level_names_test(txtfilename, filename, **kw)
+
+
+def suite():
+ test_suite = unittest.TestSuite()
+ test_suite.addTest(unittest.defaultTestLoader.loadTestsFromTestCase(TestH5pyUtils))
+ return test_suite
+
+
+if __name__ == "__main__":
+ unittest.main(defaultTest="suite")
diff --git a/silx/io/url.py b/silx/io/url.py
index 044977c..66b75f0 100644
--- a/silx/io/url.py
+++ b/silx/io/url.py
@@ -344,6 +344,8 @@ class DataUrl(object):
:rtype: bool
"""
file_path = self.file_path()
+ if file_path is None:
+ return False
if len(file_path) > 0:
if file_path[0] == "/":
return True