New upstream version 0.15.2+dfsg

author: Picca Frédéric-Emmanuel <picca@debian.org> 2021-09-07 14:39:36 +0200
committer: Picca Frédéric-Emmanuel <picca@debian.org> 2021-09-07 14:39:36 +0200
commit: d3194b1a9c4404ba93afac43d97172ab24c57098 (patch)
tree: a1604130e1401dc1cbd084518ed72869dc92b86f /silx/io
parent: b3bea947efa55d2c0f198b6c6795b3177be27f45 (diff)
7 files changed, 1367 insertions, 95 deletions
diff --git a/silx/io/dictdump.py b/silx/io/dictdump.py
index bbb244a..e907668 100644
--- a/silx/io/dictdump.py
+++ b/silx/io/dictdump.py
@@ -26,6 +26,7 @@ by text strings to following file formats: `HDF5, INI, JSON`
 """
 
 from collections import OrderedDict
+from collections.abc import Mapping
 import json
 import logging
 import numpy
@@ -34,11 +35,16 @@ import sys
 import h5py
 
 from .configdict import ConfigDict
-from .utils import is_group, is_link, is_softlink, is_externallink
+from .utils import is_group
+from .utils import is_dataset
+from .utils import is_link
+from .utils import is_softlink
+from .utils import is_externallink
 from .utils import is_file as is_h5_file_like
 from .utils import open as h5open
 from .utils import h5py_read_dataset
 from .utils import H5pyAttributesReadWrapper
+from silx.utils.deprecation import deprecated_warning
 
 __authors__ = ["P. Knobel"]
 __license__ = "MIT"
@@ -66,7 +72,7 @@ def _prepare_hdf5_write_value(array_like):
         return array
 
 
-class _SafeH5FileWrite(object):
+class _SafeH5FileWrite:
     """Context manager returning a :class:`h5py.File` object.
 
     If this object is initialized with a file path, we open the file
@@ -82,7 +88,6 @@ class _SafeH5FileWrite(object):
     """
     def __init__(self, h5file, mode="w"):
         """
-
         :param h5file:  HDF5 file path or :class:`h5py.File` instance
         :param str mode:  Can be ``"r+"`` (read/write, file must exist),
             ``"w"`` (write, existing file is lost), ``"w-"`` (write, fail if
@@ -106,7 +111,7 @@ class _SafeH5FileWrite(object):
             self.h5file.close()
 
 
-class _SafeH5FileRead(object):
+class _SafeH5FileRead:
     """Context manager returning a :class:`h5py.File` or a
     :class:`silx.io.spech5.SpecH5` or a :class:`silx.io.fabioh5.File` object.
 
@@ -136,18 +141,48 @@ class _SafeH5FileRead(object):
             self.h5file.close()
 
 
+def _normalize_h5_path(h5root, h5path):
+    """
+    :param h5root: File name or h5py-like File, Group or Dataset
+    :param str h5path: relative to ``h5root``
+    :returns 2-tuple: (File or file object, h5path)
+    """
+    if is_group(h5root):
+        group_name = h5root.name
+        if group_name == "/":
+            pass
+        elif h5path:
+            h5path = group_name + "/" + h5path
+        else:
+            h5path = group_name
+        h5file = h5root.file
+    elif is_dataset(h5root):
+        h5path = h5root.name
+        h5file = h5root.file
+    else:
+        h5file = h5root
+    if not h5path:
+        h5path = "/"
+    elif not h5path.endswith("/"):
+        h5path += "/"
+    return h5file, h5path
+
+
 def dicttoh5(treedict, h5file, h5path='/',
-             mode="w", overwrite_data=False,
-             create_dataset_args=None):
+             mode="w", overwrite_data=None,
+             create_dataset_args=None, update_mode=None):
     """Write a nested dictionary to a HDF5 file, using keys as member names.
 
     If a dictionary value is a sub-dictionary, a group is created. If it is
     any other data type, it is cast into a numpy array and written as a
     :mod:`h5py` dataset. Dictionary keys must be strings and cannot contain
     the ``/`` character.
-    
+
     If dictionary keys are tuples they are interpreted to set h5 attributes.
-    The tuples should have the format (dataset_name,attr_name)
+    The tuples should have the format (dataset_name, attr_name).
+
+    Existing HDF5 items can be deleted by providing the dictionary value
+    ``None``, provided that ``update_mode in ["modify", "replace"]``.
 
     .. note::
 
@@ -158,21 +193,29 @@ def dicttoh5(treedict, h5file, h5path='/',
         to define sub trees. If tuples are used as keys they should have the
         format (dataset_name,attr_name) and will add a 5h attribute with the
         corresponding value.
-    :param h5file: HDF5 file name or handle. If a file name is provided, the
-        function opens the file in the specified mode and closes it again
-        before completing.
-    :param h5path: Target path in HDF5 file in which scan groups are created.
+    :param h5file: File name or h5py-like File, Group or Dataset
+    :param h5path: Target path in the HDF5 file relative to ``h5file``.
         Default is root (``"/"``)
     :param mode: Can be ``"r+"`` (read/write, file must exist),
         ``"w"`` (write, existing file is lost), ``"w-"`` (write, fail if
         exists) or ``"a"`` (read/write if exists, create otherwise).
         This parameter is ignored if ``h5file`` is a file handle.
-    :param overwrite_data: If ``True``, existing groups and datasets can be
-        overwritten, if ``False`` they are skipped. This parameter is only
-        relevant if ``h5file_mode`` is ``"r+"`` or ``"a"``.
+    :param overwrite_data: Deprecated. ``True`` is approximately equivalent
+        to ``update_mode="modify"`` and ``False`` is equivalent to
+        ``update_mode="add"``.
     :param create_dataset_args: Dictionary of args you want to pass to
         ``h5f.create_dataset``. This allows you to specify filters and
         compression parameters. Don't specify ``name`` and ``data``.
+    :param update_mode: Can be ``add`` (default), ``modify`` or ``replace``.
+
+        * ``add``: Extend the existing HDF5 tree when possible. Existing HDF5
+            items (groups, datasets and attributes) remain untouched.
+        * ``modify``: Extend the existing HDF5 tree when possible, modify
+            existing attributes, modify same-sized dataset values and delete
+            HDF5 items with a ``None`` value in the dict tree.
+        * ``replace``: Replace the existing HDF5 tree. Items from the root of
+            the HDF5 tree that are not present in the root of the dict tree
+            will remain untouched.
 
     Example::
 
@@ -201,44 +244,110 @@ def dicttoh5(treedict, h5file, h5path='/',
                  create_dataset_args=create_ds_args)
     """
 
-    if not h5path.endswith("/"):
-        h5path += "/"
+    if overwrite_data is not None:
+        reason = (
+            "`overwrite_data=True` becomes `update_mode='modify'` and "
+            "`overwrite_data=False` becomes `update_mode='add'`"
+        )
+        deprecated_warning(
+            type_="argument",
+            name="overwrite_data",
+            reason=reason,
+            replacement="update_mode",
+            since_version="0.15",
+        )
+
+    if update_mode is None:
+        if overwrite_data:
+            update_mode = "modify"
+        else:
+            update_mode = "add"
+    else:
+        valid_existing_values = ("add", "replace", "modify")
+        if update_mode not in valid_existing_values:
+            raise ValueError((
+                "Argument 'update_mode' can only have values: {}"
+                "".format(valid_existing_values)
+            ))
+        if overwrite_data is not None:
+            logger.warning("The argument `overwrite_data` is ignored")
 
-    with _SafeH5FileWrite(h5file, mode=mode) as h5f:
-        if isinstance(treedict, dict) and h5path != "/":
-            if h5path not in h5f:
-                h5f.create_group(h5path)
+    if not isinstance(treedict, Mapping):
+        raise TypeError("'treedict' must be a dictionary")
 
-        for key in filter(lambda k: not isinstance(k, tuple), treedict):
-            key_is_group = isinstance(treedict[key], dict)
-            h5name = h5path + key
+    h5file, h5path = _normalize_h5_path(h5file, h5path)
 
-            if key_is_group and treedict[key]:
-                # non-empty group: recurse
-                dicttoh5(treedict[key], h5f, h5name,
-                         overwrite_data=overwrite_data,
-                         create_dataset_args=create_dataset_args)
-                continue
+    def _iter_treedict(attributes=False):
+        nonlocal treedict
+        for key, value in treedict.items():
+            if isinstance(key, tuple) == attributes:
+                yield key, value
 
-            if h5name in h5f:
-                # key already exists: delete or skip
-                if overwrite_data is True:
-                    del h5f[h5name]
+    change_allowed = update_mode in ("replace", "modify")
+
+    with _SafeH5FileWrite(h5file, mode=mode) as h5f:
+        # Create the root of the tree
+        if h5path in h5f:
+            if not is_group(h5f[h5path]):
+                if update_mode == "replace":
+                    del h5f[h5path]
+                    h5f.create_group(h5path)
                 else:
-                    logger.warning('key (%s) already exists. '
-                                    'Not overwriting.' % (h5name))
-                    continue
+                    return
+        else:
+            h5f.create_group(h5path)
 
-            value = treedict[key]
+        # Loop over all groups, links and datasets
+        for key, value in _iter_treedict(attributes=False):
+            h5name = h5path + key
+            exists = h5name in h5f
 
-            if value is None or key_is_group:
-                # Create empty group
-                h5f.create_group(h5name)
+            if value is None:
+                # Delete HDF5 item
+                if exists and change_allowed:
+                    del h5f[h5name]
+                    exists = False
+            elif isinstance(value, Mapping):
+                # HDF5 group
+                if exists and update_mode == "replace":
+                    del h5f[h5name]
+                    exists = False
+                if value:
+                    dicttoh5(value, h5f, h5name,
+                             update_mode=update_mode,
+                             create_dataset_args=create_dataset_args)
+                elif not exists:
+                    h5f.create_group(h5name)
             elif is_link(value):
-                h5f[h5name] = value
+                # HDF5 link
+                if exists and update_mode == "replace":
+                    del h5f[h5name]
+                    exists = False
+                if not exists:
+                    # Create link from h5py link object
+                    h5f[h5name] = value
             else:
+                # HDF5 dataset
+                if exists and not change_allowed:
+                    continue
                 data = _prepare_hdf5_write_value(value)
-                # can't apply filters on scalars (datasets with shape == () )
+
+                # Edit the existing dataset
+                attrs_backup = None
+                if exists:
+                    try:
+                        h5f[h5name][()] = data
+                        continue
+                    except Exception:
+                        # Delete the existing dataset
+                        if update_mode != "replace":
+                            if not is_dataset(h5f[h5name]):
+                                continue
+                            attrs_backup = dict(h5f[h5name].attrs)
+                        del h5f[h5name]
+
+                # Create dataset
+                # can't apply filters on scalars (datasets with shape == ())
                 if data.shape == () or create_dataset_args is None:
                     h5f.create_dataset(h5name,
                                        data=data)
@@ -246,36 +355,58 @@ def dicttoh5(treedict, h5file, h5path='/',
                     h5f.create_dataset(h5name,
                                        data=data,
                                        **create_dataset_args)
+                if attrs_backup:
+                    h5f[h5name].attrs.update(attrs_backup)
 
-        # deal with h5 attributes which have tuples as keys in treedict
-        for key in filter(lambda k: isinstance(k, tuple), treedict):
-            assert len(key) == 2, "attribute must be defined by 2 values"
+        # Loop over all attributes
+        for key, value in _iter_treedict(attributes=True):
+            if len(key) != 2:
+                raise ValueError("HDF5 attribute must be described by 2 values")
             h5name = h5path + key[0]
             attr_name = key[1]
 
             if h5name not in h5f:
-                # Create empty group if key for attr does not exist
+                # Create an empty group to store the attribute
                 h5f.create_group(h5name)
-                logger.warning(
-                    "key (%s) does not exist. attr %s "
-                    "will be written to ." % (h5name, attr_name)
-                )
-
-            if attr_name in h5f[h5name].attrs:
-                if not overwrite_data:
-                    logger.warning(
-                        "attribute %s@%s already exists. Not overwriting."
-                        "" % (h5name, attr_name)
-                    )
+
+            h5a = h5f[h5name].attrs
+            exists = attr_name in h5a
+
+            if value is None:
+                # Delete HDF5 attribute
+                if exists and change_allowed:
+                    del h5a[attr_name]
+                    exists = False
+            else:
+                # Add/modify HDF5 attribute
+                if exists and not change_allowed:
                     continue
+                data = _prepare_hdf5_write_value(value)
+                h5a[attr_name] = data
 
-            # Write attribute
-            value = treedict[key]
-            data = _prepare_hdf5_write_value(value)
-            h5f[h5name].attrs[attr_name] = data
+
+def _has_nx_class(treedict, key=""):
+    return key + "@NX_class" in treedict or \
+           (key, "NX_class") in treedict
+
+
+def _ensure_nx_class(treedict, parents=tuple()):
+    """Each group needs an "NX_class" attribute.
+    """
+    if _has_nx_class(treedict):
+        return
+    nparents = len(parents)
+    if nparents == 0:
+        treedict[("", "NX_class")] = "NXroot"
+    elif nparents == 1:
+        treedict[("", "NX_class")] = "NXentry"
+    else:
+        treedict[("", "NX_class")] = "NXcollection"
 
 
-def nexus_to_h5_dict(treedict, parents=tuple()):
+def nexus_to_h5_dict(
+    treedict, parents=tuple(), add_nx_class=True, has_nx_class=False
+):
     """The following conversions are applied:
         * key with "{name}@{attr_name}" notation: key converted to 2-tuple
         * key with ">{url}" notation: strip ">" and convert value to
@@ -286,14 +417,20 @@ def nexus_to_h5_dict(treedict, parents=tuple()):
          to define sub tree. The ``"@"`` character is used to write attributes.
          The ``">"`` prefix is used to define links.
     :param parents: Needed to resolve up-links (tuple of HDF5 group names)
+    :param add_nx_class: Add "NX_class" attribute when missing
+    :param has_nx_class: The "NX_class" attribute is defined in the parent
 
     :rtype dict:
     """
+    if not isinstance(treedict, Mapping):
+        raise TypeError("'treedict' must be a dictionary")
     copy = dict()
     for key, value in treedict.items():
         if "@" in key:
+            # HDF5 attribute
             key = tuple(key.rsplit("@", 1))
         elif key.startswith(">"):
+            # HDF5 link
             if isinstance(value, str):
                 key = key[1:]
                 first, sep, second = value.partition("::")
@@ -314,10 +451,19 @@ def nexus_to_h5_dict(treedict, parents=tuple()):
                     value = h5py.SoftLink(first)
             elif is_link(value):
                 key = key[1:]
-        if isinstance(value, dict):
-            copy[key] = nexus_to_h5_dict(value, parents=parents+(key,))
+        if isinstance(value, Mapping):
+            # HDF5 group
+            key_has_nx_class = add_nx_class and _has_nx_class(treedict, key)
+            copy[key] = nexus_to_h5_dict(
+                value,
+                parents=parents+(key,),
+                add_nx_class=add_nx_class,
+                has_nx_class=key_has_nx_class)
         else:
+            # HDF5 dataset or link
             copy[key] = value
+    if add_nx_class and not has_nx_class:
+        _ensure_nx_class(copy, parents)
     return copy
 
 
@@ -336,7 +482,8 @@ def h5_to_nexus_dict(treedict):
     copy = dict()
     for key, value in treedict.items():
         if isinstance(key, tuple):
-            assert len(key)==2, "attribute must be defined by 2 values"
+            if len(key) != 2:
+                raise ValueError("HDF5 attribute must be described by 2 values")
             key = "%s@%s" % (key[0], key[1])
         elif is_softlink(value):
             key = ">" + key
@@ -344,7 +491,7 @@ def h5_to_nexus_dict(treedict):
         elif is_externallink(value):
             key = ">" + key
             value = value.filename + "::" + value.path
-        if isinstance(value, dict):
+        if isinstance(value, Mapping):
             copy[key] = h5_to_nexus_dict(value)
         else:
             copy[key] = value
@@ -414,10 +561,8 @@ def h5todict(h5file,
         scalars). In some cases, you may find that a list of heterogeneous
         data types is converted to a numpy array of strings.
 
-    :param h5file: File name or :class:`h5py.File` object or spech5 file or
-        fabioh5 file.
-    :param str path: Name of HDF5 group to use as dictionary root level,
-        to read only a sub-group in the file
+    :param h5file: File name or h5py-like File, Group or Dataset
+    :param str path: Target path in the HDF5 file relative to ``h5file``
     :param List[str] exclude_names: Groups and datasets whose name contains
         a string in this list will be ignored. Default is None (ignore nothing)
     :param bool asarray: True (default) to read scalar as arrays, False to
@@ -431,6 +576,7 @@ def h5todict(h5file,
         - 'ignore': Ignore errors
     :return: Nested dictionary
     """
+    h5file, path = _normalize_h5_path(h5file, path)
     with _SafeH5FileRead(h5file) as h5f:
         ddict = {}
         if path not in h5f:
@@ -508,7 +654,7 @@ def h5todict(h5file,
     return ddict
 
 
-def dicttonx(treedict, h5file, h5path="/", **kw):
+def dicttonx(treedict, h5file, h5path="/", add_nx_class=None, **kw):
     """
     Write a nested dictionary to a HDF5 file, using string keys as member names.
     The NeXus convention is used to identify attributes with ``"@"`` character,
@@ -521,6 +667,8 @@ def dicttonx(treedict, h5file, h5path="/", **kw):
          and array-like objects as leafs. The ``"/"`` character can be used
          to define sub tree. The ``"@"`` character is used to write attributes.
          The ``">"`` prefix is used to define links.
+    :param add_nx_class: Add "NX_class" attribute when missing. By default it
+        is ``True`` when ``update_mode`` is ``"add"`` or ``None``.
 
     The named parameters are passed to dicttoh5.
 
@@ -557,12 +705,17 @@ def dicttonx(treedict, h5file, h5path="/", **kw):
 
         dicttonx(gauss,"test.h5")
     """
+    h5file, h5path = _normalize_h5_path(h5file, h5path)
     parents = tuple(p for p in h5path.split("/") if p)
-    nxtreedict = nexus_to_h5_dict(treedict, parents=parents)
+    if add_nx_class is None:
+        add_nx_class = kw.get("update_mode", None) in (None, "add")
+    nxtreedict = nexus_to_h5_dict(
+        treedict, parents=parents, add_nx_class=add_nx_class
+    )
     dicttoh5(nxtreedict, h5file, h5path=h5path, **kw)
 
 
-def nxtodict(h5file, **kw):
+def nxtodict(h5file, include_attributes=True, **kw):
     """Read a HDF5 file and return a nested dictionary with the complete file
     structure and all data.
 
@@ -571,7 +724,7 @@ def nxtodict(h5file, **kw):
 
     The named parameters are passed to h5todict.
     """
-    nxtreedict = h5todict(h5file, **kw)
+    nxtreedict = h5todict(h5file, include_attributes=include_attributes, **kw)
     return h5_to_nexus_dict(nxtreedict)
 
 
diff --git a/silx/io/h5py_utils.py b/silx/io/h5py_utils.py
new file mode 100644
index 0000000..cbdb44a
--- /dev/null
+++ b/silx/io/h5py_utils.py
@@ -0,0 +1,317 @@
+# coding: utf-8
+# /*##########################################################################
+# Copyright (C) 2016-2021 European Synchrotron Radiation Facility
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+# THE SOFTWARE.
+#
+# ############################################################################*/
+"""
+This module provides utility methods on top of h5py, mainly to handle
+parallel writing and reading.
+"""
+
+__authors__ = ["W. de Nolf"]
+__license__ = "MIT"
+__date__ = "27/01/2020"
+
+
+import os
+import traceback
+import h5py
+
+from .._version import calc_hexversion
+from ..utils import retry as retry_mod
+
+H5PY_HEX_VERSION = calc_hexversion(*h5py.version.version_tuple[:3])
+HDF5_HEX_VERSION = calc_hexversion(*h5py.version.hdf5_version_tuple[:3])
+
+HDF5_SWMR_VERSION = calc_hexversion(*h5py.get_config().swmr_min_hdf5_version[:3])
+HDF5_TRACK_ORDER_VERSION = calc_hexversion(2, 9, 0)
+
+HAS_SWMR = HDF5_HEX_VERSION >= HDF5_SWMR_VERSION
+HAS_TRACK_ORDER = H5PY_HEX_VERSION >= HDF5_TRACK_ORDER_VERSION
+
+
+def _is_h5py_exception(e):
+    for frame in traceback.walk_tb(e.__traceback__):
+        if frame[0].f_locals.get("__package__", None) == "h5py":
+            return True
+    return False
+
+
+def _retry_h5py_error(e):
+    """
+    :param BaseException e:
+    :returns bool:
+    """
+    if _is_h5py_exception(e):
+        if isinstance(e, (OSError, RuntimeError)):
+            return True
+        elif isinstance(e, KeyError):
+            # For example this needs to be retried:
+            # KeyError: 'Unable to open object (bad object header version number)'
+            return "Unable to open object" in str(e)
+    elif isinstance(e, retry_mod.RetryError):
+        return True
+    return False
+
+
+def retry(**kw):
+    """Decorator for a method that needs to be executed until it not longer
+    fails on HDF5 IO. Mainly used for reading an HDF5 file that is being
+    written.
+
+    :param \**kw: see `silx.utils.retry`
+    """
+    kw.setdefault("retry_on_error", _retry_h5py_error)
+    return retry_mod.retry(**kw)
+
+
+def retry_contextmanager(**kw):
+    """Decorator to make a context manager from a method that needs to be
+    entered until it not longer fails on HDF5 IO. Mainly used for reading
+    an HDF5 file that is being written.
+
+    :param \**kw: see `silx.utils.retry_contextmanager`
+    """
+    kw.setdefault("retry_on_error", _retry_h5py_error)
+    return retry_mod.retry_contextmanager(**kw)
+
+
+def retry_in_subprocess(**kw):
+    """Same as `retry` but it also retries segmentation faults.
+
+    On Window you cannot use this decorator with the "@" syntax:
+
+    .. code-block:: python
+
+        def _method(*args, **kw):
+            ...
+
+        method = retry_in_subprocess()(_method)
+
+    :param \**kw: see `silx.utils.retry_in_subprocess`
+    """
+    kw.setdefault("retry_on_error", _retry_h5py_error)
+    return retry_mod.retry_in_subprocess(**kw)
+
+
+def group_has_end_time(h5item):
+    """Returns True when the HDF5 item is a Group with an "end_time"
+    dataset. A reader can use this as an indication that the Group
+    has been fully written (at least if the writer supports this).
+
+    :param Union[h5py.Group,h5py.Dataset] h5item:
+    :returns bool:
+    """
+    if isinstance(h5item, h5py.Group):
+        return "end_time" in h5item
+    else:
+        return False
+
+
+@retry_contextmanager()
+def open_item(filename, name, retry_invalid=False, validate=None):
+    """Yield an HDF5 dataset or group (retry until it can be instantiated).
+
+    :param str filename:
+    :param bool retry_invalid: retry when item is missing or not valid
+    :param callable or None validate:
+    :yields Dataset, Group or None:
+    """
+    with File(filename) as h5file:
+        try:
+            item = h5file[name]
+        except KeyError as e:
+            if "doesn't exist" in str(e):
+                if retry_invalid:
+                    raise retry_mod.RetryError
+                else:
+                    item = None
+            else:
+                raise
+        if callable(validate) and item is not None:
+            if not validate(item):
+                if retry_invalid:
+                    raise retry_mod.RetryError
+                else:
+                    item = None
+        yield item
+
+
+def _top_level_names(filename, include_only=group_has_end_time):
+    """Return all valid top-level HDF5 names.
+
+    :param str filename:
+    :param callable or None include_only:
+    :returns list(str):
+    """
+    with File(filename) as h5file:
+        try:
+            if callable(include_only):
+                return [name for name in h5file["/"] if include_only(h5file[name])]
+            else:
+                return list(h5file["/"])
+        except KeyError:
+            raise retry_mod.RetryError
+
+
+top_level_names = retry()(_top_level_names)
+safe_top_level_names = retry_in_subprocess()(_top_level_names)
+
+
+class File(h5py.File):
+    """Takes care of HDF5 file locking and SWMR mode without the need
+    to handle those explicitely.
+
+    When using this class, you cannot open different files simultatiously
+    with different modes because the locking flag is an environment variable.
+    """
+
+    _HDF5_FILE_LOCKING = None
+    _NOPEN = 0
+    _SWMR_LIBVER = "latest"
+
+    def __init__(
+        self,
+        filename,
+        mode=None,
+        enable_file_locking=None,
+        swmr=None,
+        libver=None,
+        **kwargs
+    ):
+        """The arguments `enable_file_locking` and `swmr` should not be
+        specified explicitly for normal use cases.
+
+        :param str filename:
+        :param str or None mode: read-only by default
+        :param bool or None enable_file_locking: by default it is disabled for `mode='r'`
+                                                 and `swmr=False` and enabled for all
+                                                 other modes.
+        :param bool or None swmr: try both modes when `mode='r'` and `swmr=None`
+        :param **kwargs: see `h5py.File.__init__`
+        """
+        if mode is None:
+            mode = "r"
+        elif mode not in ("r", "w", "w-", "x", "a", "r+"):
+            raise ValueError("invalid mode {}".format(mode))
+        if not HAS_SWMR:
+            swmr = False
+
+        if enable_file_locking is None:
+            enable_file_locking = bool(mode != "r" or swmr)
+        if self._NOPEN:
+            self._check_locking_env(enable_file_locking)
+        else:
+            self._set_locking_env(enable_file_locking)
+
+        if swmr and libver is None:
+            libver = self._SWMR_LIBVER
+
+        if HAS_TRACK_ORDER:
+            kwargs.setdefault("track_order", True)
+        try:
+            super().__init__(filename, mode=mode, swmr=swmr, libver=libver, **kwargs)
+        except OSError as e:
+            #   wlock   wSWMR   rlock   rSWMR   OSError: Unable to open file (...)
+            # 1 TRUE    FALSE   FALSE   FALSE   -
+            # 2 TRUE    FALSE   FALSE   TRUE    -
+            # 3 TRUE    FALSE   TRUE    FALSE   unable to lock file, errno = 11, error message = 'Resource temporarily unavailable'
+            # 4 TRUE    FALSE   TRUE    TRUE    unable to lock file, errno = 11, error message = 'Resource temporarily unavailable'
+            # 5 TRUE    TRUE    FALSE   FALSE   file is already open for write (may use <h5clear file> to clear file consistency flags)
+            # 6 TRUE    TRUE    FALSE   TRUE    -
+            # 7 TRUE    TRUE    TRUE    FALSE   file is already open for write (may use <h5clear file> to clear file consistency flags)
+            # 8 TRUE    TRUE    TRUE    TRUE    -
+            if (
+                mode == "r"
+                and swmr is None
+                and "file is already open for write" in str(e)
+            ):
+                # Try reading in SWMR mode (situation 5 and 7)
+                swmr = True
+                if libver is None:
+                    libver = self._SWMR_LIBVER
+                super().__init__(
+                    filename, mode=mode, swmr=swmr, libver=libver, **kwargs
+                )
+            else:
+                raise
+        else:
+            self._add_nopen(1)
+            try:
+                if mode != "r" and swmr:
+                    # Try setting writer in SWMR mode
+                    self.swmr_mode = True
+            except Exception:
+                self.close()
+                raise
+
+    @classmethod
+    def _add_nopen(cls, v):
+        cls._NOPEN = max(cls._NOPEN + v, 0)
+
+    def close(self):
+        super().close()
+        self._add_nopen(-1)
+        if not self._NOPEN:
+            self._restore_locking_env()
+
+    def _set_locking_env(self, enable):
+        self._backup_locking_env()
+        if enable:
+            os.environ["HDF5_USE_FILE_LOCKING"] = "TRUE"
+        elif enable is None:
+            try:
+                del os.environ["HDF5_USE_FILE_LOCKING"]
+            except KeyError:
+                pass
+        else:
+            os.environ["HDF5_USE_FILE_LOCKING"] = "FALSE"
+
+    def _get_locking_env(self):
+        v = os.environ.get("HDF5_USE_FILE_LOCKING")
+        if v == "TRUE":
+            return True
+        elif v is None:
+            return None
+        else:
+            return False
+
+    def _check_locking_env(self, enable):
+        if enable != self._get_locking_env():
+            if enable:
+                raise RuntimeError(
+                    "Close all HDF5 files before enabling HDF5 file locking"
+                )
+            else:
+                raise RuntimeError(
+                    "Close all HDF5 files before disabling HDF5 file locking"
+                )
+
+    def _backup_locking_env(self):
+        v = os.environ.get("HDF5_USE_FILE_LOCKING")
+        if v is None:
+            self._HDF5_FILE_LOCKING = None
+        else:
+            self._HDF5_FILE_LOCKING = v == "TRUE"
+
+    def _restore_locking_env(self):
+        self._set_locking_env(self._HDF5_FILE_LOCKING)
+        self._HDF5_FILE_LOCKING = None
diff --git a/silx/io/spech5.py b/silx/io/spech5.py
index 79fd2e4..1eaec7c 100644
--- a/silx/io/spech5.py
+++ b/silx/io/spech5.py
@@ -1,6 +1,6 @@
 # coding: utf-8
 # /*##########################################################################
-# Copyright (C) 2016-2018 European Synchrotron Radiation Facility
+# Copyright (C) 2016-2021 European Synchrotron Radiation Facility
 #
 # Permission is hereby granted, free of charge, to any person obtaining a copy
 # of this software and associated documentation files (the "Software"), to deal
@@ -194,7 +194,7 @@ import numpy
 import six
 
 from silx import version as silx_version
-from .specfile import SpecFile
+from .specfile import SpecFile, SfErrColNotFound
 from . import commonh5
 
 __authors__ = ["P. Knobel", "D. Naudet"]
@@ -670,6 +670,10 @@ class PositionersGroup(commonh5.Group, SpecH5Group):
     def __init__(self, parent, scan):
         commonh5.Group.__init__(self, name="positioners", parent=parent,
                                 attrs={"NX_class": to_h5py_utf8("NXcollection")})
+
+        dataset_info = []  # Store list of positioner's (name, value)
+        is_error = False   # True if error encountered
+
         for motor_name in scan.motor_names:
             safe_motor_name = motor_name.replace("/", "%")
             if motor_name in scan.labels and scan.data.shape[0] > 0:
@@ -678,10 +682,24 @@ class PositionersGroup(commonh5.Group, SpecH5Group):
             else:
                 # Take value from #P scan header.
                 # (may return float("inf") if #P line is missing from scan hdr)
-                motor_value = scan.motor_position_by_name(motor_name)
-            self.add_node(SpecH5NodeDataset(name=safe_motor_name,
-                                            data=motor_value,
-                                            parent=self))
+                try:
+                    motor_value = scan.motor_position_by_name(motor_name)
+                except SfErrColNotFound:
+                    is_error = True
+                    motor_value = float('inf')
+            dataset_info.append((safe_motor_name, motor_value))
+
+        if is_error:  # Filter-out scalar values
+            logger1.warning("Mismatching number of elements in #P and #O: Ignoring")
+            dataset_info = [
+                (name, value) for name, value in dataset_info
+                if not isinstance(value, float)]
+
+        for name, value in dataset_info:
+            self.add_node(SpecH5NodeDataset(
+                name=name,
+                data=value,
+                parent=self))
 
 
 class InstrumentMcaGroup(commonh5.Group, SpecH5Group):
diff --git a/silx/io/test/__init__.py b/silx/io/test/__init__.py
index a309ee9..68b6e9b 100644
--- a/silx/io/test/__init__.py
+++ b/silx/io/test/__init__.py
@@ -40,6 +40,7 @@ from .test_nxdata import suite as test_nxdata_suite
 from .test_commonh5 import suite as test_commonh5_suite
 from .test_rawh5 import suite as test_rawh5_suite
 from .test_url import suite as test_url_suite
+from .test_h5py_utils import suite as test_h5py_utils_suite
 
 
 def suite():
@@ -56,4 +57,5 @@ def suite():
     test_suite.addTest(test_commonh5_suite())
     test_suite.addTest(test_rawh5_suite())
     test_suite.addTest(test_url_suite())
+    test_suite.addTest(test_h5py_utils_suite())
     return test_suite
diff --git a/silx/io/test/test_dictdump.py b/silx/io/test/test_dictdump.py
index b99116b..93c9183 100644
--- a/silx/io/test/test_dictdump.py
+++ b/silx/io/test/test_dictdump.py
@@ -33,6 +33,7 @@ import os
 import tempfile
 import unittest
 import h5py
+from copy import deepcopy
 
 from collections import defaultdict
 
@@ -72,7 +73,63 @@ link_attrs["links"]["absolute_softlink"] = h5py.SoftLink("/links/group/dataset")
 link_attrs["links"]["external_link"] = h5py.ExternalLink(ext_filename, "/ext_group/dataset")
 
 
-class TestDictToH5(unittest.TestCase):
+class DictTestCase(unittest.TestCase):
+
+    def assertRecursiveEqual(self, expected, actual, nodes=tuple()):
+        err_msg = "\n\n Tree nodes: {}".format(nodes)
+        if isinstance(expected, dict):
+            self.assertTrue(isinstance(actual, dict), msg=err_msg)
+            self.assertEqual(
+                set(expected.keys()),
+                set(actual.keys()),
+                msg=err_msg
+            )
+            for k in actual:
+                self.assertRecursiveEqual(
+                    expected[k],
+                    actual[k],
+                    nodes=nodes + (k,),
+                )
+            return
+        if isinstance(actual, numpy.ndarray):
+            actual = actual.tolist()
+        if isinstance(expected, numpy.ndarray):
+            expected = expected.tolist()
+
+        self.assertEqual(expected, actual, msg=err_msg)
+
+
+class H5DictTestCase(DictTestCase):
+
+    def _dictRoundTripNormalize(self, treedict):
+        """Convert the dictionary as expected from a round-trip
+        treedict -> dicttoh5 -> h5todict -> newtreedict
+        """
+        for key, value in list(treedict.items()):
+            if isinstance(value, dict):
+                self._dictRoundTripNormalize(value)
+
+        # Expand treedict[("group", "attr_name")]
+        #     to treedict["group"]["attr_name"]
+        for key, value in list(treedict.items()):
+            if not isinstance(key, tuple):
+                continue
+            # Put the attribute inside the group
+            grpname, attr = key
+            if not grpname:
+                continue
+            group = treedict.setdefault(grpname, dict())
+            if isinstance(group, dict):
+                del treedict[key]
+                group[("", attr)] = value
+
+    def dictRoundTripNormalize(self, treedict):
+        treedict2 = deepcopy(treedict)
+        self._dictRoundTripNormalize(treedict2)
+        return treedict2
+
+
+class TestDictToH5(H5DictTestCase):
     def setUp(self):
         self.tempdir = tempfile.mkdtemp()
         self.h5_fname = os.path.join(self.tempdir, "cityattrs.h5")
@@ -110,14 +167,13 @@ class TestDictToH5(unittest.TestCase):
                 min(ddict["city attributes"]["Europe"]["France"]["Grenoble"]["coordinates"]),
                 5.7196)
 
-    def testH5Overwrite(self):
+    def testH5OverwriteDeprecatedApi(self):
         dd = ConfigDict({'t': True})
 
         dicttoh5(h5file=self.h5_fname, treedict=dd, mode='a')
         dd = ConfigDict({'t': False})
-        with TestLogging(dictdump_logger, warning=1):
-            dicttoh5(h5file=self.h5_fname, treedict=dd, mode='a',
-                     overwrite_data=False)
+        dicttoh5(h5file=self.h5_fname, treedict=dd, mode='a',
+                 overwrite_data=False)
 
         res = h5todict(self.h5_fname)
         assert(res['t'] == True)
@@ -200,8 +256,7 @@ class TestDictToH5(unittest.TestCase):
             ("group", "attr"): 10,
         }
         with h5py.File(self.h5_fname, "w") as h5file:
-            with TestLogging(dictdump_logger, warning=1):
-                dictdump.dicttoh5(ddict, h5file)
+            dictdump.dicttoh5(ddict, h5file)
             self.assertEqual(h5file["group"].attrs['attr'], 10)
 
     def testFlatDict(self):
@@ -241,8 +296,223 @@ class TestDictToH5(unittest.TestCase):
             numpy.testing.assert_array_equal(h5py_read_dataset(h5file["darks"]["0"]),
                                              ddict['darks']['0'])
 
-
-class TestH5ToDict(unittest.TestCase):
+    def testOverwrite(self):
+        # Tree structure that will be tested
+        group1 = {
+            ("", "attr2"): "original2",
+            "dset1": 0,
+            "dset2": [0, 1],
+            ("dset1", "attr1"): "original1",
+            ("dset1", "attr2"): "original2",
+            ("dset2", "attr1"): "original1",
+            ("dset2", "attr2"): "original2",
+        }
+        group2 = {
+            "subgroup1": group1.copy(),
+            "subgroup2": group1.copy(),
+            ("subgroup1", "attr1"): "original1",
+            ("subgroup2", "attr1"): "original1"
+        }
+        group2.update(group1)
+        # initial HDF5 tree
+        otreedict = {
+            ('', 'attr1'): "original1",
+            ('', 'attr2'): "original2",
+            'group1': group1,
+            'group2': group2,
+            ('group1', 'attr1'): "original1",
+            ('group2', 'attr1'): "original1"
+        }
+        wtreedict = None  # dumped dictionary
+        etreedict = None  # expected HDF5 tree after dump
+
+        def reset_file():
+            dicttoh5(
+                otreedict,
+                h5file=self.h5_fname,
+                mode="w",
+            )
+
+        def append_file(update_mode):
+            dicttoh5(
+                wtreedict,
+                h5file=self.h5_fname,
+                mode="a",
+                update_mode=update_mode
+            )
+
+        def assert_file():
+            rtreedict = h5todict(
+                self.h5_fname,
+                include_attributes=True,
+                asarray=False
+            )
+            netreedict = self.dictRoundTripNormalize(etreedict)
+            try:
+                self.assertRecursiveEqual(netreedict, rtreedict)
+            except AssertionError:
+                from pprint import pprint
+                print("\nDUMP:")
+                pprint(wtreedict)
+                print("\nEXPECTED:")
+                pprint(netreedict)
+                print("\nHDF5:")
+                pprint(rtreedict)
+                raise
+
+        def assert_append(update_mode):
+            append_file(update_mode)
+            assert_file()
+
+        # Test wrong arguments
+        with self.assertRaises(ValueError):
+            dicttoh5(
+                otreedict,
+                h5file=self.h5_fname,
+                mode="w",
+                update_mode="wrong-value"
+            )
+
+        # No writing
+        reset_file()
+        etreedict = deepcopy(otreedict)
+        assert_file()
+
+        # Write identical dictionary
+        wtreedict = deepcopy(otreedict)
+
+        reset_file()
+        etreedict = deepcopy(otreedict)
+        for update_mode in [None, "add", "modify", "replace"]:
+            assert_append(update_mode)
+
+        # Write empty dictionary
+        wtreedict = dict()
+
+        reset_file()
+        etreedict = deepcopy(otreedict)
+        for update_mode in [None, "add", "modify", "replace"]:
+            assert_append(update_mode)
+
+        # Modified dataset
+        wtreedict = dict()
+        wtreedict["group2"] = dict()
+        wtreedict["group2"]["subgroup2"] = dict()
+        wtreedict["group2"]["subgroup2"]["dset1"] = {"dset3": [10, 20]}
+        wtreedict["group2"]["subgroup2"]["dset2"] = [10, 20]
+
+        reset_file()
+        etreedict = deepcopy(otreedict)
+        for update_mode in [None, "add"]:
+            assert_append(update_mode)
+
+        etreedict["group2"]["subgroup2"]["dset2"] = [10, 20]
+        assert_append("modify")
+
+        etreedict["group2"] = dict()
+        del etreedict[("group2", "attr1")]
+        etreedict["group2"]["subgroup2"] = dict()
+        etreedict["group2"]["subgroup2"]["dset1"] = {"dset3": [10, 20]}
+        etreedict["group2"]["subgroup2"]["dset2"] = [10, 20]
+        assert_append("replace")
+
+        # Modified group
+        wtreedict = dict()
+        wtreedict["group2"] = dict()
+        wtreedict["group2"]["subgroup2"] = [0, 1]
+
+        reset_file()
+        etreedict = deepcopy(otreedict)
+        for update_mode in [None, "add", "modify"]:
+            assert_append(update_mode)
+
+        etreedict["group2"] = dict()
+        del etreedict[("group2", "attr1")]
+        etreedict["group2"]["subgroup2"] = [0, 1]
+        assert_append("replace")
+
+        # Modified attribute
+        wtreedict = dict()
+        wtreedict["group2"] = dict()
+        wtreedict["group2"]["subgroup2"] = dict()
+        wtreedict["group2"]["subgroup2"][("dset1", "attr1")] = "modified"
+
+        reset_file()
+        etreedict = deepcopy(otreedict)
+        for update_mode in [None, "add"]:
+            assert_append(update_mode)
+
+        etreedict["group2"]["subgroup2"][("dset1", "attr1")] = "modified"
+        assert_append("modify")
+
+        etreedict["group2"] = dict()
+        del etreedict[("group2", "attr1")]
+        etreedict["group2"]["subgroup2"] = dict()
+        etreedict["group2"]["subgroup2"]["dset1"] = dict()
+        etreedict["group2"]["subgroup2"]["dset1"][("", "attr1")] = "modified"
+        assert_append("replace")
+
+        # Delete group
+        wtreedict = dict()
+        wtreedict["group2"] = dict()
+        wtreedict["group2"]["subgroup2"] = None
+
+        reset_file()
+        etreedict = deepcopy(otreedict)
+        for update_mode in [None, "add"]:
+            assert_append(update_mode)
+
+        del etreedict["group2"]["subgroup2"]
+        del etreedict["group2"][("subgroup2", "attr1")]
+        assert_append("modify")
+
+        etreedict["group2"] = dict()
+        del etreedict[("group2", "attr1")]
+        assert_append("replace")
+
+        # Delete dataset
+        wtreedict = dict()
+        wtreedict["group2"] = dict()
+        wtreedict["group2"]["subgroup2"] = dict()
+        wtreedict["group2"]["subgroup2"]["dset2"] = None
+
+        reset_file()
+        etreedict = deepcopy(otreedict)
+        for update_mode in [None, "add"]:
+            assert_append(update_mode)
+
+        del etreedict["group2"]["subgroup2"]["dset2"]
+        del etreedict["group2"]["subgroup2"][("dset2", "attr1")]
+        del etreedict["group2"]["subgroup2"][("dset2", "attr2")]
+        assert_append("modify")
+
+        etreedict["group2"] = dict()
+        del etreedict[("group2", "attr1")]
+        etreedict["group2"]["subgroup2"] = dict()
+        assert_append("replace")
+
+        # Delete attribute
+        wtreedict = dict()
+        wtreedict["group2"] = dict()
+        wtreedict["group2"]["subgroup2"] = dict()
+        wtreedict["group2"]["subgroup2"][("dset2", "attr1")] = None
+
+        reset_file()
+        etreedict = deepcopy(otreedict)
+        for update_mode in [None, "add"]:
+            assert_append(update_mode)
+
+        del etreedict["group2"]["subgroup2"][("dset2", "attr1")]
+        assert_append("modify")
+
+        etreedict["group2"] = dict()
+        del etreedict[("group2", "attr1")]
+        etreedict["group2"]["subgroup2"] = dict()
+        etreedict["group2"]["subgroup2"]["dset2"] = dict()
+        assert_append("replace")
+
+
+class TestH5ToDict(H5DictTestCase):
     def setUp(self):
         self.tempdir = tempfile.mkdtemp()
         self.h5_fname = os.path.join(self.tempdir, "cityattrs.h5")
@@ -313,7 +583,7 @@ class TestH5ToDict(unittest.TestCase):
         numpy.testing.assert_array_equal(ddict[("", "attr_2utf8")], adict[("", "attr_2utf8")])
 
 
-class TestDictToNx(unittest.TestCase):
+class TestDictToNx(H5DictTestCase):
     def setUp(self):
         self.tempdir = tempfile.mkdtemp()
         self.h5_fname = os.path.join(self.tempdir, "nx.h5")
@@ -416,8 +686,121 @@ class TestDictToNx(unittest.TestCase):
         with h5py.File(self.h5_fname, "r") as h5file:
             self.assertEqual(h5file["/links/group/subgroup/relative_softlink"][()], 10)
 
-
-class TestNxToDict(unittest.TestCase):
+    def testOverwrite(self):
+        entry_name = "entry"
+        wtreedict = {
+            "group1": {"a": 1, "b": 2},
+            "group2@attr3": "attr3",
+            "group2@attr4": "attr4",
+            "group2": {
+                "@attr1": "attr1",
+                "@attr2": "attr2",
+                "c": 3,
+                "d": 4,
+                "dataset4": 8,
+                "dataset4@units": "keV",
+            },
+            "group3": {"subgroup": {"e": 9, "f": 10}},
+            "dataset1": 5,
+            "dataset2": 6,
+            "dataset3": 7,
+            "dataset3@units": "mm",
+        }
+        esubtree = {
+            "@NX_class": "NXentry",
+            "group1": {"@NX_class": "NXcollection", "a": 1, "b": 2},
+            "group2": {
+                "@NX_class": "NXcollection",
+                "@attr1": "attr1",
+                "@attr2": "attr2",
+                "@attr3": "attr3",
+                "@attr4": "attr4",
+                "c": 3,
+                "d": 4,
+                "dataset4": 8,
+                "dataset4@units": "keV",
+            },
+            "group3": {
+                "@NX_class": "NXcollection",
+                "subgroup": {"@NX_class": "NXcollection", "e": 9, "f": 10},
+            },
+            "dataset1": 5,
+            "dataset2": 6,
+            "dataset3": 7,
+            "dataset3@units": "mm",
+        }
+        etreedict = {entry_name: esubtree}
+
+        def append_file(update_mode, add_nx_class):
+            dictdump.dicttonx(
+                wtreedict,
+                h5file=self.h5_fname,
+                mode="a",
+                h5path=entry_name,
+                update_mode=update_mode,
+                add_nx_class=add_nx_class
+            )
+
+        def assert_file():
+            rtreedict = dictdump.nxtodict(
+                self.h5_fname,
+                include_attributes=True,
+                asarray=False,
+            )
+            netreedict = self.dictRoundTripNormalize(etreedict)
+            try:
+                self.assertRecursiveEqual(netreedict, rtreedict)
+            except AssertionError:
+                from pprint import pprint
+                print("\nDUMP:")
+                pprint(wtreedict)
+                print("\nEXPECTED:")
+                pprint(netreedict)
+                print("\nHDF5:")
+                pprint(rtreedict)
+                raise
+
+        def assert_append(update_mode, add_nx_class=None):
+            append_file(update_mode, add_nx_class=add_nx_class)
+            assert_file()
+
+        # First to an empty file
+        assert_append(None)
+
+        # Add non-existing attributes/datasets/groups
+        wtreedict["group1"].pop("a")
+        wtreedict["group2"].pop("@attr1")
+        wtreedict["group2"]["@attr2"] = "attr3"  # only for update
+        wtreedict["group2"]["@type"] = "test"
+        wtreedict["group2"]["dataset4"] = 9  # only for update
+        del wtreedict["group2"]["dataset4@units"]
+        wtreedict["group3"] = {}
+        esubtree["group2"]["@type"] = "test"
+        assert_append("add")
+
+        # Add update existing attributes and datasets
+        esubtree["group2"]["@attr2"] = "attr3"
+        esubtree["group2"]["dataset4"] = 9
+        assert_append("modify")
+
+        # Do not add missing NX_class by default when updating
+        wtreedict["group2"]["@NX_class"] = "NXprocess"
+        esubtree["group2"]["@NX_class"] = "NXprocess"
+        assert_append("modify")
+        del wtreedict["group2"]["@NX_class"]
+        assert_append("modify")
+
+        # Overwrite existing groups/datasets/attributes
+        esubtree["group1"].pop("a")
+        esubtree["group2"].pop("@attr1")
+        esubtree["group2"]["@NX_class"] = "NXcollection"
+        esubtree["group2"]["dataset4"] = 9
+        del esubtree["group2"]["dataset4@units"]
+        esubtree["group3"] = {"@NX_class": "NXcollection"}
+        assert_append("replace", add_nx_class=True)
+
+
+class TestNxToDict(H5DictTestCase):
     def setUp(self):
         self.tempdir = tempfile.mkdtemp()
         self.h5_fname = os.path.join(self.tempdir, "nx.h5")
@@ -510,7 +893,7 @@ class TestNxToDict(unittest.TestCase):
             h5todict(self.h5_fname, path="/Mars", errors='raise')
 
 
-class TestDictToJson(unittest.TestCase):
+class TestDictToJson(DictTestCase):
     def setUp(self):
         self.dir_path = tempfile.mkdtemp()
         self.json_fname = os.path.join(self.dir_path, "cityattrs.json")
@@ -528,7 +911,7 @@ class TestDictToJson(unittest.TestCase):
             self.assertIn('"inhabitants": 160215', json_content)
 
 
-class TestDictToIni(unittest.TestCase):
+class TestDictToIni(DictTestCase):
     def setUp(self):
         self.dir_path = tempfile.mkdtemp()
         self.ini_fname = os.path.join(self.dir_path, "test.ini")
diff --git a/silx/io/test/test_h5py_utils.py b/silx/io/test/test_h5py_utils.py
new file mode 100644
index 0000000..2e2e3dd
--- /dev/null
+++ b/silx/io/test/test_h5py_utils.py
@@ -0,0 +1,397 @@
+# coding: utf-8
+# /*##########################################################################
+# Copyright (C) 2016-2017 European Synchrotron Radiation Facility
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+# THE SOFTWARE.
+#
+# ############################################################################*/
+"""Tests for h5py utilities"""
+
+__authors__ = ["W. de Nolf"]
+__license__ = "MIT"
+__date__ = "27/01/2020"
+
+
+import unittest
+import os
+import sys
+import time
+import shutil
+import tempfile
+import threading
+import multiprocessing
+from contextlib import contextmanager
+
+from .. import h5py_utils
+from ...utils.retry import RetryError, RetryTimeoutError
+
+IS_WINDOWS = sys.platform == "win32"
+
+
+def _subprocess_context_main(queue, contextmgr, *args, **kw):
+    try:
+        with contextmgr(*args, **kw):
+            queue.put(None)
+            threading.Event().wait()
+    except Exception:
+        queue.put(None)
+        raise
+
+
+@contextmanager
+def _subprocess_context(contextmgr, *args, **kw):
+    timeout = kw.pop("timeout", 10)
+    queue = multiprocessing.Queue(maxsize=1)
+    p = multiprocessing.Process(
+        target=_subprocess_context_main, args=(queue, contextmgr) + args, kwargs=kw
+    )
+    p.start()
+    try:
+        queue.get(timeout=timeout)
+        yield
+    finally:
+        try:
+            p.kill()
+        except AttributeError:
+            p.terminate()
+        p.join(timeout)
+
+
+@contextmanager
+def _open_context(filename, **kw):
+    with h5py_utils.File(filename, **kw) as f:
+        if kw.get("mode") == "w":
+            f["check"] = True
+            f.flush()
+        yield f
+
+
+def _cause_segfault():
+    import ctypes
+
+    i = ctypes.c_char(b"a")
+    j = ctypes.pointer(i)
+    c = 0
+    while True:
+        j[c] = b"a"
+        c += 1
+
+
+def _top_level_names_test(txtfilename, *args, **kw):
+    sys.stderr = open(os.devnull, "w")
+
+    with open(txtfilename, mode="r") as f:
+        failcounter = int(f.readline().strip())
+
+    ncausefailure = kw.pop("ncausefailure")
+    faildelay = kw.pop("faildelay")
+    if failcounter < ncausefailure:
+        time.sleep(faildelay)
+        failcounter += 1
+        with open(txtfilename, mode="w") as f:
+            f.write(str(failcounter))
+        if failcounter % 2:
+            raise RetryError
+        else:
+            _cause_segfault()
+    return h5py_utils._top_level_names(*args, **kw)
+
+
+top_level_names_test = h5py_utils.retry_in_subprocess()(_top_level_names_test)
+
+
+def subtests(test):
+    def wrapper(self):
+        for _ in self._subtests():
+            with self.subTest(**self._subtest_options):
+                test(self)
+
+    return wrapper
+
+
+class TestH5pyUtils(unittest.TestCase):
+    def setUp(self):
+        self.test_dir = tempfile.mkdtemp()
+
+    def tearDown(self):
+        shutil.rmtree(self.test_dir)
+
+    def _subtests(self):
+        self._subtest_options = {"mode": "w"}
+        self.filename_generator = self._filenames()
+        yield
+        self._subtest_options = {"mode": "w", "libver": "latest"}
+        self.filename_generator = self._filenames()
+        yield
+
+    @property
+    def _liber_allows_concurrent_access(self):
+        return self._subtest_options.get("libver") in [None, "earliest", "v18"]
+
+    def _filenames(self):
+        i = 1
+        while True:
+            filename = os.path.join(self.test_dir, "file{}.h5".format(i))
+            with self._open_context(filename):
+                pass
+            yield filename
+            i += 1
+
+    def _new_filename(self):
+        return next(self.filename_generator)
+
+    @contextmanager
+    def _open_context(self, filename, **kwargs):
+        kw = self._subtest_options
+        kw.update(kwargs)
+        with _open_context(filename, **kw) as f:
+
+            yield f
+
+    @contextmanager
+    def _open_context_subprocess(self, filename, **kwargs):
+        kw = self._subtest_options
+        kw.update(kwargs)
+        with _subprocess_context(_open_context, filename, **kw):
+            yield
+
+    def _assert_hdf5_data(self, f):
+        self.assertTrue(f["check"][()])
+
+    def _validate_hdf5_data(self, filename, swmr=False):
+        with self._open_context(filename, mode="r") as f:
+            self.assertEqual(f.swmr_mode, swmr)
+            self._assert_hdf5_data(f)
+
+    @subtests
+    def test_modes_single_process(self):
+        orig = os.environ.get("HDF5_USE_FILE_LOCKING")
+        filename1 = self._new_filename()
+        self.assertEqual(orig, os.environ.get("HDF5_USE_FILE_LOCKING"))
+        filename2 = self._new_filename()
+        self.assertEqual(orig, os.environ.get("HDF5_USE_FILE_LOCKING"))
+        with self._open_context(filename1, mode="r"):
+            with self._open_context(filename2, mode="r"):
+                pass
+            for mode in ["w", "a"]:
+                with self.assertRaises(RuntimeError):
+                    with self._open_context(filename2, mode=mode):
+                        pass
+        self.assertEqual(orig, os.environ.get("HDF5_USE_FILE_LOCKING"))
+        with self._open_context(filename1, mode="a"):
+            for mode in ["w", "a"]:
+                with self._open_context(filename2, mode=mode):
+                    pass
+            with self.assertRaises(RuntimeError):
+                with self._open_context(filename2, mode="r"):
+                    pass
+        self.assertEqual(orig, os.environ.get("HDF5_USE_FILE_LOCKING"))
+
+    @subtests
+    def test_modes_multi_process(self):
+        if not self._liber_allows_concurrent_access:
+            # A concurrent reader with HDF5_USE_FILE_LOCKING=FALSE is
+            # no longer works with HDF5 >=1.10 (you get an exception
+            # when trying to open the file)
+            return
+        filename = self._new_filename()
+
+        # File open by truncating writer
+        with self._open_context_subprocess(filename, mode="w"):
+            with self._open_context(filename, mode="r") as f:
+                self._assert_hdf5_data(f)
+            if IS_WINDOWS:
+                with self._open_context(filename, mode="a") as f:
+                    self._assert_hdf5_data(f)
+            else:
+                with self.assertRaises(OSError):
+                    with self._open_context(filename, mode="a") as f:
+                        pass
+            self._validate_hdf5_data(filename)
+
+        # File open by appending writer
+        with self._open_context_subprocess(filename, mode="a"):
+            with self._open_context(filename, mode="r") as f:
+                self._assert_hdf5_data(f)
+            if IS_WINDOWS:
+                with self._open_context(filename, mode="a") as f:
+                    self._assert_hdf5_data(f)
+            else:
+                with self.assertRaises(OSError):
+                    with self._open_context(filename, mode="a") as f:
+                        pass
+            self._validate_hdf5_data(filename)
+
+        # File open by reader
+        with self._open_context_subprocess(filename, mode="r"):
+            with self._open_context(filename, mode="r") as f:
+                self._assert_hdf5_data(f)
+            with self._open_context(filename, mode="a") as f:
+                pass
+            self._validate_hdf5_data(filename)
+
+        # File open by locking reader
+        with _subprocess_context(
+            _open_context, filename, mode="r", enable_file_locking=True
+        ):
+            with self._open_context(filename, mode="r") as f:
+                self._assert_hdf5_data(f)
+            if IS_WINDOWS:
+                with self._open_context(filename, mode="a") as f:
+                    self._assert_hdf5_data(f)
+            else:
+                with self.assertRaises(OSError):
+                    with self._open_context(filename, mode="a") as f:
+                        pass
+            self._validate_hdf5_data(filename)
+
+    @subtests
+    @unittest.skipIf(not h5py_utils.HAS_SWMR, "SWMR not supported")
+    def test_modes_multi_process_swmr(self):
+        filename = self._new_filename()
+
+        with self._open_context(filename, mode="w", libver="latest") as f:
+            pass
+
+        # File open by SWMR writer
+        with self._open_context_subprocess(filename, mode="a", swmr=True):
+            with self._open_context(filename, mode="r") as f:
+                assert f.swmr_mode
+                self._assert_hdf5_data(f)
+            with self.assertRaises(OSError):
+                with self._open_context(filename, mode="a") as f:
+                    pass
+            self._validate_hdf5_data(filename, swmr=True)
+
+    @subtests
+    def test_retry_defaults(self):
+        filename = self._new_filename()
+
+        names = h5py_utils.top_level_names(filename)
+        self.assertEqual(names, [])
+
+        names = h5py_utils.safe_top_level_names(filename)
+        self.assertEqual(names, [])
+
+        names = h5py_utils.top_level_names(filename, include_only=None)
+        self.assertEqual(names, ["check"])
+
+        names = h5py_utils.safe_top_level_names(filename, include_only=None)
+        self.assertEqual(names, ["check"])
+
+        with h5py_utils.open_item(filename, "/check", validate=lambda x: False) as item:
+            self.assertEqual(item, None)
+
+        with h5py_utils.open_item(filename, "/check", validate=None) as item:
+            self.assertTrue(item[()])
+
+        with self.assertRaises(RetryTimeoutError):
+            with h5py_utils.open_item(
+                filename,
+                "/check",
+                retry_timeout=0.1,
+                retry_invalid=True,
+                validate=lambda x: False,
+            ) as item:
+                pass
+
+        ncall = 0
+
+        def validate(item):
+            nonlocal ncall
+            if ncall >= 1:
+                return True
+            else:
+                ncall += 1
+                raise RetryError
+
+        with h5py_utils.open_item(
+            filename, "/check", validate=validate, retry_timeout=1, retry_invalid=True
+        ) as item:
+            self.assertTrue(item[()])
+
+    @subtests
+    def test_retry_custom(self):
+        filename = self._new_filename()
+        ncausefailure = 3
+        faildelay = 0.1
+        sufficient_timeout = ncausefailure * (faildelay + 10)
+        insufficient_timeout = ncausefailure * faildelay * 0.5
+
+        @h5py_utils.retry_contextmanager()
+        def open_item(filename, name):
+            nonlocal failcounter
+            if failcounter < ncausefailure:
+                time.sleep(faildelay)
+                failcounter += 1
+                raise RetryError
+            with h5py_utils.File(filename) as h5file:
+                yield h5file[name]
+
+        failcounter = 0
+        kw = {"retry_timeout": sufficient_timeout}
+        with open_item(filename, "/check", **kw) as item:
+            self.assertTrue(item[()])
+
+        failcounter = 0
+        kw = {"retry_timeout": insufficient_timeout}
+        with self.assertRaises(RetryTimeoutError):
+            with open_item(filename, "/check", **kw) as item:
+                pass
+
+    @subtests
+    def test_retry_in_subprocess(self):
+        filename = self._new_filename()
+        txtfilename = os.path.join(self.test_dir, "failcounter.txt")
+        ncausefailure = 3
+        faildelay = 0.1
+        sufficient_timeout = ncausefailure * (faildelay + 10)
+        insufficient_timeout = ncausefailure * faildelay * 0.5
+
+        kw = {
+            "retry_timeout": sufficient_timeout,
+            "include_only": None,
+            "ncausefailure": ncausefailure,
+            "faildelay": faildelay,
+        }
+        with open(txtfilename, mode="w") as f:
+            f.write("0")
+        names = top_level_names_test(txtfilename, filename, **kw)
+        self.assertEqual(names, ["check"])
+
+        kw = {
+            "retry_timeout": insufficient_timeout,
+            "include_only": None,
+            "ncausefailure": ncausefailure,
+            "faildelay": faildelay,
+        }
+        with open(txtfilename, mode="w") as f:
+            f.write("0")
+        with self.assertRaises(RetryTimeoutError):
+            top_level_names_test(txtfilename, filename, **kw)
+
+
+def suite():
+    test_suite = unittest.TestSuite()
+    test_suite.addTest(unittest.defaultTestLoader.loadTestsFromTestCase(TestH5pyUtils))
+    return test_suite
+
+
+if __name__ == "__main__":
+    unittest.main(defaultTest="suite")
diff --git a/silx/io/url.py b/silx/io/url.py
index 044977c..66b75f0 100644
--- a/silx/io/url.py
+++ b/silx/io/url.py
@@ -344,6 +344,8 @@ class DataUrl(object):
         :rtype: bool
         """
         file_path = self.file_path()
+        if file_path is None:
+            return False
         if len(file_path) > 0:
             if file_path[0] == "/":
                 return True
author	Picca Frédéric-Emmanuel <picca@debian.org>	2021-09-07 14:39:36 +0200
committer	Picca Frédéric-Emmanuel <picca@debian.org>	2021-09-07 14:39:36 +0200
commit	d3194b1a9c4404ba93afac43d97172ab24c57098 (patch)
tree	a1604130e1401dc1cbd084518ed72869dc92b86f /silx/io
parent	b3bea947efa55d2c0f198b6c6795b3177be27f45 (diff)