diff options
author | Picca Frédéric-Emmanuel <picca@debian.org> | 2021-09-07 14:39:36 +0200 |
---|---|---|
committer | Picca Frédéric-Emmanuel <picca@debian.org> | 2021-09-07 14:39:36 +0200 |
commit | d3194b1a9c4404ba93afac43d97172ab24c57098 (patch) | |
tree | a1604130e1401dc1cbd084518ed72869dc92b86f /silx/io | |
parent | b3bea947efa55d2c0f198b6c6795b3177be27f45 (diff) |
New upstream version 0.15.2+dfsg
Diffstat (limited to 'silx/io')
-rw-r--r-- | silx/io/dictdump.py | 303 | ||||
-rw-r--r-- | silx/io/h5py_utils.py | 317 | ||||
-rw-r--r-- | silx/io/spech5.py | 30 | ||||
-rw-r--r-- | silx/io/test/__init__.py | 2 | ||||
-rw-r--r-- | silx/io/test/test_dictdump.py | 411 | ||||
-rw-r--r-- | silx/io/test/test_h5py_utils.py | 397 | ||||
-rw-r--r-- | silx/io/url.py | 2 |
7 files changed, 1367 insertions, 95 deletions
diff --git a/silx/io/dictdump.py b/silx/io/dictdump.py index bbb244a..e907668 100644 --- a/silx/io/dictdump.py +++ b/silx/io/dictdump.py @@ -26,6 +26,7 @@ by text strings to following file formats: `HDF5, INI, JSON` """ from collections import OrderedDict +from collections.abc import Mapping import json import logging import numpy @@ -34,11 +35,16 @@ import sys import h5py from .configdict import ConfigDict -from .utils import is_group, is_link, is_softlink, is_externallink +from .utils import is_group +from .utils import is_dataset +from .utils import is_link +from .utils import is_softlink +from .utils import is_externallink from .utils import is_file as is_h5_file_like from .utils import open as h5open from .utils import h5py_read_dataset from .utils import H5pyAttributesReadWrapper +from silx.utils.deprecation import deprecated_warning __authors__ = ["P. Knobel"] __license__ = "MIT" @@ -66,7 +72,7 @@ def _prepare_hdf5_write_value(array_like): return array -class _SafeH5FileWrite(object): +class _SafeH5FileWrite: """Context manager returning a :class:`h5py.File` object. If this object is initialized with a file path, we open the file @@ -82,7 +88,6 @@ class _SafeH5FileWrite(object): """ def __init__(self, h5file, mode="w"): """ - :param h5file: HDF5 file path or :class:`h5py.File` instance :param str mode: Can be ``"r+"`` (read/write, file must exist), ``"w"`` (write, existing file is lost), ``"w-"`` (write, fail if @@ -106,7 +111,7 @@ class _SafeH5FileWrite(object): self.h5file.close() -class _SafeH5FileRead(object): +class _SafeH5FileRead: """Context manager returning a :class:`h5py.File` or a :class:`silx.io.spech5.SpecH5` or a :class:`silx.io.fabioh5.File` object. @@ -136,18 +141,48 @@ class _SafeH5FileRead(object): self.h5file.close() +def _normalize_h5_path(h5root, h5path): + """ + :param h5root: File name or h5py-like File, Group or Dataset + :param str h5path: relative to ``h5root`` + :returns 2-tuple: (File or file object, h5path) + """ + if is_group(h5root): + group_name = h5root.name + if group_name == "/": + pass + elif h5path: + h5path = group_name + "/" + h5path + else: + h5path = group_name + h5file = h5root.file + elif is_dataset(h5root): + h5path = h5root.name + h5file = h5root.file + else: + h5file = h5root + if not h5path: + h5path = "/" + elif not h5path.endswith("/"): + h5path += "/" + return h5file, h5path + + def dicttoh5(treedict, h5file, h5path='/', - mode="w", overwrite_data=False, - create_dataset_args=None): + mode="w", overwrite_data=None, + create_dataset_args=None, update_mode=None): """Write a nested dictionary to a HDF5 file, using keys as member names. If a dictionary value is a sub-dictionary, a group is created. If it is any other data type, it is cast into a numpy array and written as a :mod:`h5py` dataset. Dictionary keys must be strings and cannot contain the ``/`` character. - + If dictionary keys are tuples they are interpreted to set h5 attributes. - The tuples should have the format (dataset_name,attr_name) + The tuples should have the format (dataset_name, attr_name). + + Existing HDF5 items can be deleted by providing the dictionary value + ``None``, provided that ``update_mode in ["modify", "replace"]``. .. note:: @@ -158,21 +193,29 @@ def dicttoh5(treedict, h5file, h5path='/', to define sub trees. If tuples are used as keys they should have the format (dataset_name,attr_name) and will add a 5h attribute with the corresponding value. - :param h5file: HDF5 file name or handle. If a file name is provided, the - function opens the file in the specified mode and closes it again - before completing. - :param h5path: Target path in HDF5 file in which scan groups are created. + :param h5file: File name or h5py-like File, Group or Dataset + :param h5path: Target path in the HDF5 file relative to ``h5file``. Default is root (``"/"``) :param mode: Can be ``"r+"`` (read/write, file must exist), ``"w"`` (write, existing file is lost), ``"w-"`` (write, fail if exists) or ``"a"`` (read/write if exists, create otherwise). This parameter is ignored if ``h5file`` is a file handle. - :param overwrite_data: If ``True``, existing groups and datasets can be - overwritten, if ``False`` they are skipped. This parameter is only - relevant if ``h5file_mode`` is ``"r+"`` or ``"a"``. + :param overwrite_data: Deprecated. ``True`` is approximately equivalent + to ``update_mode="modify"`` and ``False`` is equivalent to + ``update_mode="add"``. :param create_dataset_args: Dictionary of args you want to pass to ``h5f.create_dataset``. This allows you to specify filters and compression parameters. Don't specify ``name`` and ``data``. + :param update_mode: Can be ``add`` (default), ``modify`` or ``replace``. + + * ``add``: Extend the existing HDF5 tree when possible. Existing HDF5 + items (groups, datasets and attributes) remain untouched. + * ``modify``: Extend the existing HDF5 tree when possible, modify + existing attributes, modify same-sized dataset values and delete + HDF5 items with a ``None`` value in the dict tree. + * ``replace``: Replace the existing HDF5 tree. Items from the root of + the HDF5 tree that are not present in the root of the dict tree + will remain untouched. Example:: @@ -201,44 +244,110 @@ def dicttoh5(treedict, h5file, h5path='/', create_dataset_args=create_ds_args) """ - if not h5path.endswith("/"): - h5path += "/" + if overwrite_data is not None: + reason = ( + "`overwrite_data=True` becomes `update_mode='modify'` and " + "`overwrite_data=False` becomes `update_mode='add'`" + ) + deprecated_warning( + type_="argument", + name="overwrite_data", + reason=reason, + replacement="update_mode", + since_version="0.15", + ) + + if update_mode is None: + if overwrite_data: + update_mode = "modify" + else: + update_mode = "add" + else: + valid_existing_values = ("add", "replace", "modify") + if update_mode not in valid_existing_values: + raise ValueError(( + "Argument 'update_mode' can only have values: {}" + "".format(valid_existing_values) + )) + if overwrite_data is not None: + logger.warning("The argument `overwrite_data` is ignored") - with _SafeH5FileWrite(h5file, mode=mode) as h5f: - if isinstance(treedict, dict) and h5path != "/": - if h5path not in h5f: - h5f.create_group(h5path) + if not isinstance(treedict, Mapping): + raise TypeError("'treedict' must be a dictionary") - for key in filter(lambda k: not isinstance(k, tuple), treedict): - key_is_group = isinstance(treedict[key], dict) - h5name = h5path + key + h5file, h5path = _normalize_h5_path(h5file, h5path) - if key_is_group and treedict[key]: - # non-empty group: recurse - dicttoh5(treedict[key], h5f, h5name, - overwrite_data=overwrite_data, - create_dataset_args=create_dataset_args) - continue + def _iter_treedict(attributes=False): + nonlocal treedict + for key, value in treedict.items(): + if isinstance(key, tuple) == attributes: + yield key, value - if h5name in h5f: - # key already exists: delete or skip - if overwrite_data is True: - del h5f[h5name] + change_allowed = update_mode in ("replace", "modify") + + with _SafeH5FileWrite(h5file, mode=mode) as h5f: + # Create the root of the tree + if h5path in h5f: + if not is_group(h5f[h5path]): + if update_mode == "replace": + del h5f[h5path] + h5f.create_group(h5path) else: - logger.warning('key (%s) already exists. ' - 'Not overwriting.' % (h5name)) - continue + return + else: + h5f.create_group(h5path) - value = treedict[key] + # Loop over all groups, links and datasets + for key, value in _iter_treedict(attributes=False): + h5name = h5path + key + exists = h5name in h5f - if value is None or key_is_group: - # Create empty group - h5f.create_group(h5name) + if value is None: + # Delete HDF5 item + if exists and change_allowed: + del h5f[h5name] + exists = False + elif isinstance(value, Mapping): + # HDF5 group + if exists and update_mode == "replace": + del h5f[h5name] + exists = False + if value: + dicttoh5(value, h5f, h5name, + update_mode=update_mode, + create_dataset_args=create_dataset_args) + elif not exists: + h5f.create_group(h5name) elif is_link(value): - h5f[h5name] = value + # HDF5 link + if exists and update_mode == "replace": + del h5f[h5name] + exists = False + if not exists: + # Create link from h5py link object + h5f[h5name] = value else: + # HDF5 dataset + if exists and not change_allowed: + continue data = _prepare_hdf5_write_value(value) - # can't apply filters on scalars (datasets with shape == () ) + + # Edit the existing dataset + attrs_backup = None + if exists: + try: + h5f[h5name][()] = data + continue + except Exception: + # Delete the existing dataset + if update_mode != "replace": + if not is_dataset(h5f[h5name]): + continue + attrs_backup = dict(h5f[h5name].attrs) + del h5f[h5name] + + # Create dataset + # can't apply filters on scalars (datasets with shape == ()) if data.shape == () or create_dataset_args is None: h5f.create_dataset(h5name, data=data) @@ -246,36 +355,58 @@ def dicttoh5(treedict, h5file, h5path='/', h5f.create_dataset(h5name, data=data, **create_dataset_args) + if attrs_backup: + h5f[h5name].attrs.update(attrs_backup) - # deal with h5 attributes which have tuples as keys in treedict - for key in filter(lambda k: isinstance(k, tuple), treedict): - assert len(key) == 2, "attribute must be defined by 2 values" + # Loop over all attributes + for key, value in _iter_treedict(attributes=True): + if len(key) != 2: + raise ValueError("HDF5 attribute must be described by 2 values") h5name = h5path + key[0] attr_name = key[1] if h5name not in h5f: - # Create empty group if key for attr does not exist + # Create an empty group to store the attribute h5f.create_group(h5name) - logger.warning( - "key (%s) does not exist. attr %s " - "will be written to ." % (h5name, attr_name) - ) - - if attr_name in h5f[h5name].attrs: - if not overwrite_data: - logger.warning( - "attribute %s@%s already exists. Not overwriting." - "" % (h5name, attr_name) - ) + + h5a = h5f[h5name].attrs + exists = attr_name in h5a + + if value is None: + # Delete HDF5 attribute + if exists and change_allowed: + del h5a[attr_name] + exists = False + else: + # Add/modify HDF5 attribute + if exists and not change_allowed: continue + data = _prepare_hdf5_write_value(value) + h5a[attr_name] = data - # Write attribute - value = treedict[key] - data = _prepare_hdf5_write_value(value) - h5f[h5name].attrs[attr_name] = data + +def _has_nx_class(treedict, key=""): + return key + "@NX_class" in treedict or \ + (key, "NX_class") in treedict + + +def _ensure_nx_class(treedict, parents=tuple()): + """Each group needs an "NX_class" attribute. + """ + if _has_nx_class(treedict): + return + nparents = len(parents) + if nparents == 0: + treedict[("", "NX_class")] = "NXroot" + elif nparents == 1: + treedict[("", "NX_class")] = "NXentry" + else: + treedict[("", "NX_class")] = "NXcollection" -def nexus_to_h5_dict(treedict, parents=tuple()): +def nexus_to_h5_dict( + treedict, parents=tuple(), add_nx_class=True, has_nx_class=False +): """The following conversions are applied: * key with "{name}@{attr_name}" notation: key converted to 2-tuple * key with ">{url}" notation: strip ">" and convert value to @@ -286,14 +417,20 @@ def nexus_to_h5_dict(treedict, parents=tuple()): to define sub tree. The ``"@"`` character is used to write attributes. The ``">"`` prefix is used to define links. :param parents: Needed to resolve up-links (tuple of HDF5 group names) + :param add_nx_class: Add "NX_class" attribute when missing + :param has_nx_class: The "NX_class" attribute is defined in the parent :rtype dict: """ + if not isinstance(treedict, Mapping): + raise TypeError("'treedict' must be a dictionary") copy = dict() for key, value in treedict.items(): if "@" in key: + # HDF5 attribute key = tuple(key.rsplit("@", 1)) elif key.startswith(">"): + # HDF5 link if isinstance(value, str): key = key[1:] first, sep, second = value.partition("::") @@ -314,10 +451,19 @@ def nexus_to_h5_dict(treedict, parents=tuple()): value = h5py.SoftLink(first) elif is_link(value): key = key[1:] - if isinstance(value, dict): - copy[key] = nexus_to_h5_dict(value, parents=parents+(key,)) + if isinstance(value, Mapping): + # HDF5 group + key_has_nx_class = add_nx_class and _has_nx_class(treedict, key) + copy[key] = nexus_to_h5_dict( + value, + parents=parents+(key,), + add_nx_class=add_nx_class, + has_nx_class=key_has_nx_class) else: + # HDF5 dataset or link copy[key] = value + if add_nx_class and not has_nx_class: + _ensure_nx_class(copy, parents) return copy @@ -336,7 +482,8 @@ def h5_to_nexus_dict(treedict): copy = dict() for key, value in treedict.items(): if isinstance(key, tuple): - assert len(key)==2, "attribute must be defined by 2 values" + if len(key) != 2: + raise ValueError("HDF5 attribute must be described by 2 values") key = "%s@%s" % (key[0], key[1]) elif is_softlink(value): key = ">" + key @@ -344,7 +491,7 @@ def h5_to_nexus_dict(treedict): elif is_externallink(value): key = ">" + key value = value.filename + "::" + value.path - if isinstance(value, dict): + if isinstance(value, Mapping): copy[key] = h5_to_nexus_dict(value) else: copy[key] = value @@ -414,10 +561,8 @@ def h5todict(h5file, scalars). In some cases, you may find that a list of heterogeneous data types is converted to a numpy array of strings. - :param h5file: File name or :class:`h5py.File` object or spech5 file or - fabioh5 file. - :param str path: Name of HDF5 group to use as dictionary root level, - to read only a sub-group in the file + :param h5file: File name or h5py-like File, Group or Dataset + :param str path: Target path in the HDF5 file relative to ``h5file`` :param List[str] exclude_names: Groups and datasets whose name contains a string in this list will be ignored. Default is None (ignore nothing) :param bool asarray: True (default) to read scalar as arrays, False to @@ -431,6 +576,7 @@ def h5todict(h5file, - 'ignore': Ignore errors :return: Nested dictionary """ + h5file, path = _normalize_h5_path(h5file, path) with _SafeH5FileRead(h5file) as h5f: ddict = {} if path not in h5f: @@ -508,7 +654,7 @@ def h5todict(h5file, return ddict -def dicttonx(treedict, h5file, h5path="/", **kw): +def dicttonx(treedict, h5file, h5path="/", add_nx_class=None, **kw): """ Write a nested dictionary to a HDF5 file, using string keys as member names. The NeXus convention is used to identify attributes with ``"@"`` character, @@ -521,6 +667,8 @@ def dicttonx(treedict, h5file, h5path="/", **kw): and array-like objects as leafs. The ``"/"`` character can be used to define sub tree. The ``"@"`` character is used to write attributes. The ``">"`` prefix is used to define links. + :param add_nx_class: Add "NX_class" attribute when missing. By default it + is ``True`` when ``update_mode`` is ``"add"`` or ``None``. The named parameters are passed to dicttoh5. @@ -557,12 +705,17 @@ def dicttonx(treedict, h5file, h5path="/", **kw): dicttonx(gauss,"test.h5") """ + h5file, h5path = _normalize_h5_path(h5file, h5path) parents = tuple(p for p in h5path.split("/") if p) - nxtreedict = nexus_to_h5_dict(treedict, parents=parents) + if add_nx_class is None: + add_nx_class = kw.get("update_mode", None) in (None, "add") + nxtreedict = nexus_to_h5_dict( + treedict, parents=parents, add_nx_class=add_nx_class + ) dicttoh5(nxtreedict, h5file, h5path=h5path, **kw) -def nxtodict(h5file, **kw): +def nxtodict(h5file, include_attributes=True, **kw): """Read a HDF5 file and return a nested dictionary with the complete file structure and all data. @@ -571,7 +724,7 @@ def nxtodict(h5file, **kw): The named parameters are passed to h5todict. """ - nxtreedict = h5todict(h5file, **kw) + nxtreedict = h5todict(h5file, include_attributes=include_attributes, **kw) return h5_to_nexus_dict(nxtreedict) diff --git a/silx/io/h5py_utils.py b/silx/io/h5py_utils.py new file mode 100644 index 0000000..cbdb44a --- /dev/null +++ b/silx/io/h5py_utils.py @@ -0,0 +1,317 @@ +# coding: utf-8 +# /*########################################################################## +# Copyright (C) 2016-2021 European Synchrotron Radiation Facility +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. +# +# ############################################################################*/ +""" +This module provides utility methods on top of h5py, mainly to handle +parallel writing and reading. +""" + +__authors__ = ["W. de Nolf"] +__license__ = "MIT" +__date__ = "27/01/2020" + + +import os +import traceback +import h5py + +from .._version import calc_hexversion +from ..utils import retry as retry_mod + +H5PY_HEX_VERSION = calc_hexversion(*h5py.version.version_tuple[:3]) +HDF5_HEX_VERSION = calc_hexversion(*h5py.version.hdf5_version_tuple[:3]) + +HDF5_SWMR_VERSION = calc_hexversion(*h5py.get_config().swmr_min_hdf5_version[:3]) +HDF5_TRACK_ORDER_VERSION = calc_hexversion(2, 9, 0) + +HAS_SWMR = HDF5_HEX_VERSION >= HDF5_SWMR_VERSION +HAS_TRACK_ORDER = H5PY_HEX_VERSION >= HDF5_TRACK_ORDER_VERSION + + +def _is_h5py_exception(e): + for frame in traceback.walk_tb(e.__traceback__): + if frame[0].f_locals.get("__package__", None) == "h5py": + return True + return False + + +def _retry_h5py_error(e): + """ + :param BaseException e: + :returns bool: + """ + if _is_h5py_exception(e): + if isinstance(e, (OSError, RuntimeError)): + return True + elif isinstance(e, KeyError): + # For example this needs to be retried: + # KeyError: 'Unable to open object (bad object header version number)' + return "Unable to open object" in str(e) + elif isinstance(e, retry_mod.RetryError): + return True + return False + + +def retry(**kw): + """Decorator for a method that needs to be executed until it not longer + fails on HDF5 IO. Mainly used for reading an HDF5 file that is being + written. + + :param \**kw: see `silx.utils.retry` + """ + kw.setdefault("retry_on_error", _retry_h5py_error) + return retry_mod.retry(**kw) + + +def retry_contextmanager(**kw): + """Decorator to make a context manager from a method that needs to be + entered until it not longer fails on HDF5 IO. Mainly used for reading + an HDF5 file that is being written. + + :param \**kw: see `silx.utils.retry_contextmanager` + """ + kw.setdefault("retry_on_error", _retry_h5py_error) + return retry_mod.retry_contextmanager(**kw) + + +def retry_in_subprocess(**kw): + """Same as `retry` but it also retries segmentation faults. + + On Window you cannot use this decorator with the "@" syntax: + + .. code-block:: python + + def _method(*args, **kw): + ... + + method = retry_in_subprocess()(_method) + + :param \**kw: see `silx.utils.retry_in_subprocess` + """ + kw.setdefault("retry_on_error", _retry_h5py_error) + return retry_mod.retry_in_subprocess(**kw) + + +def group_has_end_time(h5item): + """Returns True when the HDF5 item is a Group with an "end_time" + dataset. A reader can use this as an indication that the Group + has been fully written (at least if the writer supports this). + + :param Union[h5py.Group,h5py.Dataset] h5item: + :returns bool: + """ + if isinstance(h5item, h5py.Group): + return "end_time" in h5item + else: + return False + + +@retry_contextmanager() +def open_item(filename, name, retry_invalid=False, validate=None): + """Yield an HDF5 dataset or group (retry until it can be instantiated). + + :param str filename: + :param bool retry_invalid: retry when item is missing or not valid + :param callable or None validate: + :yields Dataset, Group or None: + """ + with File(filename) as h5file: + try: + item = h5file[name] + except KeyError as e: + if "doesn't exist" in str(e): + if retry_invalid: + raise retry_mod.RetryError + else: + item = None + else: + raise + if callable(validate) and item is not None: + if not validate(item): + if retry_invalid: + raise retry_mod.RetryError + else: + item = None + yield item + + +def _top_level_names(filename, include_only=group_has_end_time): + """Return all valid top-level HDF5 names. + + :param str filename: + :param callable or None include_only: + :returns list(str): + """ + with File(filename) as h5file: + try: + if callable(include_only): + return [name for name in h5file["/"] if include_only(h5file[name])] + else: + return list(h5file["/"]) + except KeyError: + raise retry_mod.RetryError + + +top_level_names = retry()(_top_level_names) +safe_top_level_names = retry_in_subprocess()(_top_level_names) + + +class File(h5py.File): + """Takes care of HDF5 file locking and SWMR mode without the need + to handle those explicitely. + + When using this class, you cannot open different files simultatiously + with different modes because the locking flag is an environment variable. + """ + + _HDF5_FILE_LOCKING = None + _NOPEN = 0 + _SWMR_LIBVER = "latest" + + def __init__( + self, + filename, + mode=None, + enable_file_locking=None, + swmr=None, + libver=None, + **kwargs + ): + """The arguments `enable_file_locking` and `swmr` should not be + specified explicitly for normal use cases. + + :param str filename: + :param str or None mode: read-only by default + :param bool or None enable_file_locking: by default it is disabled for `mode='r'` + and `swmr=False` and enabled for all + other modes. + :param bool or None swmr: try both modes when `mode='r'` and `swmr=None` + :param **kwargs: see `h5py.File.__init__` + """ + if mode is None: + mode = "r" + elif mode not in ("r", "w", "w-", "x", "a", "r+"): + raise ValueError("invalid mode {}".format(mode)) + if not HAS_SWMR: + swmr = False + + if enable_file_locking is None: + enable_file_locking = bool(mode != "r" or swmr) + if self._NOPEN: + self._check_locking_env(enable_file_locking) + else: + self._set_locking_env(enable_file_locking) + + if swmr and libver is None: + libver = self._SWMR_LIBVER + + if HAS_TRACK_ORDER: + kwargs.setdefault("track_order", True) + try: + super().__init__(filename, mode=mode, swmr=swmr, libver=libver, **kwargs) + except OSError as e: + # wlock wSWMR rlock rSWMR OSError: Unable to open file (...) + # 1 TRUE FALSE FALSE FALSE - + # 2 TRUE FALSE FALSE TRUE - + # 3 TRUE FALSE TRUE FALSE unable to lock file, errno = 11, error message = 'Resource temporarily unavailable' + # 4 TRUE FALSE TRUE TRUE unable to lock file, errno = 11, error message = 'Resource temporarily unavailable' + # 5 TRUE TRUE FALSE FALSE file is already open for write (may use <h5clear file> to clear file consistency flags) + # 6 TRUE TRUE FALSE TRUE - + # 7 TRUE TRUE TRUE FALSE file is already open for write (may use <h5clear file> to clear file consistency flags) + # 8 TRUE TRUE TRUE TRUE - + if ( + mode == "r" + and swmr is None + and "file is already open for write" in str(e) + ): + # Try reading in SWMR mode (situation 5 and 7) + swmr = True + if libver is None: + libver = self._SWMR_LIBVER + super().__init__( + filename, mode=mode, swmr=swmr, libver=libver, **kwargs + ) + else: + raise + else: + self._add_nopen(1) + try: + if mode != "r" and swmr: + # Try setting writer in SWMR mode + self.swmr_mode = True + except Exception: + self.close() + raise + + @classmethod + def _add_nopen(cls, v): + cls._NOPEN = max(cls._NOPEN + v, 0) + + def close(self): + super().close() + self._add_nopen(-1) + if not self._NOPEN: + self._restore_locking_env() + + def _set_locking_env(self, enable): + self._backup_locking_env() + if enable: + os.environ["HDF5_USE_FILE_LOCKING"] = "TRUE" + elif enable is None: + try: + del os.environ["HDF5_USE_FILE_LOCKING"] + except KeyError: + pass + else: + os.environ["HDF5_USE_FILE_LOCKING"] = "FALSE" + + def _get_locking_env(self): + v = os.environ.get("HDF5_USE_FILE_LOCKING") + if v == "TRUE": + return True + elif v is None: + return None + else: + return False + + def _check_locking_env(self, enable): + if enable != self._get_locking_env(): + if enable: + raise RuntimeError( + "Close all HDF5 files before enabling HDF5 file locking" + ) + else: + raise RuntimeError( + "Close all HDF5 files before disabling HDF5 file locking" + ) + + def _backup_locking_env(self): + v = os.environ.get("HDF5_USE_FILE_LOCKING") + if v is None: + self._HDF5_FILE_LOCKING = None + else: + self._HDF5_FILE_LOCKING = v == "TRUE" + + def _restore_locking_env(self): + self._set_locking_env(self._HDF5_FILE_LOCKING) + self._HDF5_FILE_LOCKING = None diff --git a/silx/io/spech5.py b/silx/io/spech5.py index 79fd2e4..1eaec7c 100644 --- a/silx/io/spech5.py +++ b/silx/io/spech5.py @@ -1,6 +1,6 @@ # coding: utf-8 # /*########################################################################## -# Copyright (C) 2016-2018 European Synchrotron Radiation Facility +# Copyright (C) 2016-2021 European Synchrotron Radiation Facility # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -194,7 +194,7 @@ import numpy import six from silx import version as silx_version -from .specfile import SpecFile +from .specfile import SpecFile, SfErrColNotFound from . import commonh5 __authors__ = ["P. Knobel", "D. Naudet"] @@ -670,6 +670,10 @@ class PositionersGroup(commonh5.Group, SpecH5Group): def __init__(self, parent, scan): commonh5.Group.__init__(self, name="positioners", parent=parent, attrs={"NX_class": to_h5py_utf8("NXcollection")}) + + dataset_info = [] # Store list of positioner's (name, value) + is_error = False # True if error encountered + for motor_name in scan.motor_names: safe_motor_name = motor_name.replace("/", "%") if motor_name in scan.labels and scan.data.shape[0] > 0: @@ -678,10 +682,24 @@ class PositionersGroup(commonh5.Group, SpecH5Group): else: # Take value from #P scan header. # (may return float("inf") if #P line is missing from scan hdr) - motor_value = scan.motor_position_by_name(motor_name) - self.add_node(SpecH5NodeDataset(name=safe_motor_name, - data=motor_value, - parent=self)) + try: + motor_value = scan.motor_position_by_name(motor_name) + except SfErrColNotFound: + is_error = True + motor_value = float('inf') + dataset_info.append((safe_motor_name, motor_value)) + + if is_error: # Filter-out scalar values + logger1.warning("Mismatching number of elements in #P and #O: Ignoring") + dataset_info = [ + (name, value) for name, value in dataset_info + if not isinstance(value, float)] + + for name, value in dataset_info: + self.add_node(SpecH5NodeDataset( + name=name, + data=value, + parent=self)) class InstrumentMcaGroup(commonh5.Group, SpecH5Group): diff --git a/silx/io/test/__init__.py b/silx/io/test/__init__.py index a309ee9..68b6e9b 100644 --- a/silx/io/test/__init__.py +++ b/silx/io/test/__init__.py @@ -40,6 +40,7 @@ from .test_nxdata import suite as test_nxdata_suite from .test_commonh5 import suite as test_commonh5_suite from .test_rawh5 import suite as test_rawh5_suite from .test_url import suite as test_url_suite +from .test_h5py_utils import suite as test_h5py_utils_suite def suite(): @@ -56,4 +57,5 @@ def suite(): test_suite.addTest(test_commonh5_suite()) test_suite.addTest(test_rawh5_suite()) test_suite.addTest(test_url_suite()) + test_suite.addTest(test_h5py_utils_suite()) return test_suite diff --git a/silx/io/test/test_dictdump.py b/silx/io/test/test_dictdump.py index b99116b..93c9183 100644 --- a/silx/io/test/test_dictdump.py +++ b/silx/io/test/test_dictdump.py @@ -33,6 +33,7 @@ import os import tempfile import unittest import h5py +from copy import deepcopy from collections import defaultdict @@ -72,7 +73,63 @@ link_attrs["links"]["absolute_softlink"] = h5py.SoftLink("/links/group/dataset") link_attrs["links"]["external_link"] = h5py.ExternalLink(ext_filename, "/ext_group/dataset") -class TestDictToH5(unittest.TestCase): +class DictTestCase(unittest.TestCase): + + def assertRecursiveEqual(self, expected, actual, nodes=tuple()): + err_msg = "\n\n Tree nodes: {}".format(nodes) + if isinstance(expected, dict): + self.assertTrue(isinstance(actual, dict), msg=err_msg) + self.assertEqual( + set(expected.keys()), + set(actual.keys()), + msg=err_msg + ) + for k in actual: + self.assertRecursiveEqual( + expected[k], + actual[k], + nodes=nodes + (k,), + ) + return + if isinstance(actual, numpy.ndarray): + actual = actual.tolist() + if isinstance(expected, numpy.ndarray): + expected = expected.tolist() + + self.assertEqual(expected, actual, msg=err_msg) + + +class H5DictTestCase(DictTestCase): + + def _dictRoundTripNormalize(self, treedict): + """Convert the dictionary as expected from a round-trip + treedict -> dicttoh5 -> h5todict -> newtreedict + """ + for key, value in list(treedict.items()): + if isinstance(value, dict): + self._dictRoundTripNormalize(value) + + # Expand treedict[("group", "attr_name")] + # to treedict["group"]["attr_name"] + for key, value in list(treedict.items()): + if not isinstance(key, tuple): + continue + # Put the attribute inside the group + grpname, attr = key + if not grpname: + continue + group = treedict.setdefault(grpname, dict()) + if isinstance(group, dict): + del treedict[key] + group[("", attr)] = value + + def dictRoundTripNormalize(self, treedict): + treedict2 = deepcopy(treedict) + self._dictRoundTripNormalize(treedict2) + return treedict2 + + +class TestDictToH5(H5DictTestCase): def setUp(self): self.tempdir = tempfile.mkdtemp() self.h5_fname = os.path.join(self.tempdir, "cityattrs.h5") @@ -110,14 +167,13 @@ class TestDictToH5(unittest.TestCase): min(ddict["city attributes"]["Europe"]["France"]["Grenoble"]["coordinates"]), 5.7196) - def testH5Overwrite(self): + def testH5OverwriteDeprecatedApi(self): dd = ConfigDict({'t': True}) dicttoh5(h5file=self.h5_fname, treedict=dd, mode='a') dd = ConfigDict({'t': False}) - with TestLogging(dictdump_logger, warning=1): - dicttoh5(h5file=self.h5_fname, treedict=dd, mode='a', - overwrite_data=False) + dicttoh5(h5file=self.h5_fname, treedict=dd, mode='a', + overwrite_data=False) res = h5todict(self.h5_fname) assert(res['t'] == True) @@ -200,8 +256,7 @@ class TestDictToH5(unittest.TestCase): ("group", "attr"): 10, } with h5py.File(self.h5_fname, "w") as h5file: - with TestLogging(dictdump_logger, warning=1): - dictdump.dicttoh5(ddict, h5file) + dictdump.dicttoh5(ddict, h5file) self.assertEqual(h5file["group"].attrs['attr'], 10) def testFlatDict(self): @@ -241,8 +296,223 @@ class TestDictToH5(unittest.TestCase): numpy.testing.assert_array_equal(h5py_read_dataset(h5file["darks"]["0"]), ddict['darks']['0']) - -class TestH5ToDict(unittest.TestCase): + def testOverwrite(self): + # Tree structure that will be tested + group1 = { + ("", "attr2"): "original2", + "dset1": 0, + "dset2": [0, 1], + ("dset1", "attr1"): "original1", + ("dset1", "attr2"): "original2", + ("dset2", "attr1"): "original1", + ("dset2", "attr2"): "original2", + } + group2 = { + "subgroup1": group1.copy(), + "subgroup2": group1.copy(), + ("subgroup1", "attr1"): "original1", + ("subgroup2", "attr1"): "original1" + } + group2.update(group1) + # initial HDF5 tree + otreedict = { + ('', 'attr1'): "original1", + ('', 'attr2'): "original2", + 'group1': group1, + 'group2': group2, + ('group1', 'attr1'): "original1", + ('group2', 'attr1'): "original1" + } + wtreedict = None # dumped dictionary + etreedict = None # expected HDF5 tree after dump + + def reset_file(): + dicttoh5( + otreedict, + h5file=self.h5_fname, + mode="w", + ) + + def append_file(update_mode): + dicttoh5( + wtreedict, + h5file=self.h5_fname, + mode="a", + update_mode=update_mode + ) + + def assert_file(): + rtreedict = h5todict( + self.h5_fname, + include_attributes=True, + asarray=False + ) + netreedict = self.dictRoundTripNormalize(etreedict) + try: + self.assertRecursiveEqual(netreedict, rtreedict) + except AssertionError: + from pprint import pprint + print("\nDUMP:") + pprint(wtreedict) + print("\nEXPECTED:") + pprint(netreedict) + print("\nHDF5:") + pprint(rtreedict) + raise + + def assert_append(update_mode): + append_file(update_mode) + assert_file() + + # Test wrong arguments + with self.assertRaises(ValueError): + dicttoh5( + otreedict, + h5file=self.h5_fname, + mode="w", + update_mode="wrong-value" + ) + + # No writing + reset_file() + etreedict = deepcopy(otreedict) + assert_file() + + # Write identical dictionary + wtreedict = deepcopy(otreedict) + + reset_file() + etreedict = deepcopy(otreedict) + for update_mode in [None, "add", "modify", "replace"]: + assert_append(update_mode) + + # Write empty dictionary + wtreedict = dict() + + reset_file() + etreedict = deepcopy(otreedict) + for update_mode in [None, "add", "modify", "replace"]: + assert_append(update_mode) + + # Modified dataset + wtreedict = dict() + wtreedict["group2"] = dict() + wtreedict["group2"]["subgroup2"] = dict() + wtreedict["group2"]["subgroup2"]["dset1"] = {"dset3": [10, 20]} + wtreedict["group2"]["subgroup2"]["dset2"] = [10, 20] + + reset_file() + etreedict = deepcopy(otreedict) + for update_mode in [None, "add"]: + assert_append(update_mode) + + etreedict["group2"]["subgroup2"]["dset2"] = [10, 20] + assert_append("modify") + + etreedict["group2"] = dict() + del etreedict[("group2", "attr1")] + etreedict["group2"]["subgroup2"] = dict() + etreedict["group2"]["subgroup2"]["dset1"] = {"dset3": [10, 20]} + etreedict["group2"]["subgroup2"]["dset2"] = [10, 20] + assert_append("replace") + + # Modified group + wtreedict = dict() + wtreedict["group2"] = dict() + wtreedict["group2"]["subgroup2"] = [0, 1] + + reset_file() + etreedict = deepcopy(otreedict) + for update_mode in [None, "add", "modify"]: + assert_append(update_mode) + + etreedict["group2"] = dict() + del etreedict[("group2", "attr1")] + etreedict["group2"]["subgroup2"] = [0, 1] + assert_append("replace") + + # Modified attribute + wtreedict = dict() + wtreedict["group2"] = dict() + wtreedict["group2"]["subgroup2"] = dict() + wtreedict["group2"]["subgroup2"][("dset1", "attr1")] = "modified" + + reset_file() + etreedict = deepcopy(otreedict) + for update_mode in [None, "add"]: + assert_append(update_mode) + + etreedict["group2"]["subgroup2"][("dset1", "attr1")] = "modified" + assert_append("modify") + + etreedict["group2"] = dict() + del etreedict[("group2", "attr1")] + etreedict["group2"]["subgroup2"] = dict() + etreedict["group2"]["subgroup2"]["dset1"] = dict() + etreedict["group2"]["subgroup2"]["dset1"][("", "attr1")] = "modified" + assert_append("replace") + + # Delete group + wtreedict = dict() + wtreedict["group2"] = dict() + wtreedict["group2"]["subgroup2"] = None + + reset_file() + etreedict = deepcopy(otreedict) + for update_mode in [None, "add"]: + assert_append(update_mode) + + del etreedict["group2"]["subgroup2"] + del etreedict["group2"][("subgroup2", "attr1")] + assert_append("modify") + + etreedict["group2"] = dict() + del etreedict[("group2", "attr1")] + assert_append("replace") + + # Delete dataset + wtreedict = dict() + wtreedict["group2"] = dict() + wtreedict["group2"]["subgroup2"] = dict() + wtreedict["group2"]["subgroup2"]["dset2"] = None + + reset_file() + etreedict = deepcopy(otreedict) + for update_mode in [None, "add"]: + assert_append(update_mode) + + del etreedict["group2"]["subgroup2"]["dset2"] + del etreedict["group2"]["subgroup2"][("dset2", "attr1")] + del etreedict["group2"]["subgroup2"][("dset2", "attr2")] + assert_append("modify") + + etreedict["group2"] = dict() + del etreedict[("group2", "attr1")] + etreedict["group2"]["subgroup2"] = dict() + assert_append("replace") + + # Delete attribute + wtreedict = dict() + wtreedict["group2"] = dict() + wtreedict["group2"]["subgroup2"] = dict() + wtreedict["group2"]["subgroup2"][("dset2", "attr1")] = None + + reset_file() + etreedict = deepcopy(otreedict) + for update_mode in [None, "add"]: + assert_append(update_mode) + + del etreedict["group2"]["subgroup2"][("dset2", "attr1")] + assert_append("modify") + + etreedict["group2"] = dict() + del etreedict[("group2", "attr1")] + etreedict["group2"]["subgroup2"] = dict() + etreedict["group2"]["subgroup2"]["dset2"] = dict() + assert_append("replace") + + +class TestH5ToDict(H5DictTestCase): def setUp(self): self.tempdir = tempfile.mkdtemp() self.h5_fname = os.path.join(self.tempdir, "cityattrs.h5") @@ -313,7 +583,7 @@ class TestH5ToDict(unittest.TestCase): numpy.testing.assert_array_equal(ddict[("", "attr_2utf8")], adict[("", "attr_2utf8")]) -class TestDictToNx(unittest.TestCase): +class TestDictToNx(H5DictTestCase): def setUp(self): self.tempdir = tempfile.mkdtemp() self.h5_fname = os.path.join(self.tempdir, "nx.h5") @@ -416,8 +686,121 @@ class TestDictToNx(unittest.TestCase): with h5py.File(self.h5_fname, "r") as h5file: self.assertEqual(h5file["/links/group/subgroup/relative_softlink"][()], 10) - -class TestNxToDict(unittest.TestCase): + def testOverwrite(self): + entry_name = "entry" + wtreedict = { + "group1": {"a": 1, "b": 2}, + "group2@attr3": "attr3", + "group2@attr4": "attr4", + "group2": { + "@attr1": "attr1", + "@attr2": "attr2", + "c": 3, + "d": 4, + "dataset4": 8, + "dataset4@units": "keV", + }, + "group3": {"subgroup": {"e": 9, "f": 10}}, + "dataset1": 5, + "dataset2": 6, + "dataset3": 7, + "dataset3@units": "mm", + } + esubtree = { + "@NX_class": "NXentry", + "group1": {"@NX_class": "NXcollection", "a": 1, "b": 2}, + "group2": { + "@NX_class": "NXcollection", + "@attr1": "attr1", + "@attr2": "attr2", + "@attr3": "attr3", + "@attr4": "attr4", + "c": 3, + "d": 4, + "dataset4": 8, + "dataset4@units": "keV", + }, + "group3": { + "@NX_class": "NXcollection", + "subgroup": {"@NX_class": "NXcollection", "e": 9, "f": 10}, + }, + "dataset1": 5, + "dataset2": 6, + "dataset3": 7, + "dataset3@units": "mm", + } + etreedict = {entry_name: esubtree} + + def append_file(update_mode, add_nx_class): + dictdump.dicttonx( + wtreedict, + h5file=self.h5_fname, + mode="a", + h5path=entry_name, + update_mode=update_mode, + add_nx_class=add_nx_class + ) + + def assert_file(): + rtreedict = dictdump.nxtodict( + self.h5_fname, + include_attributes=True, + asarray=False, + ) + netreedict = self.dictRoundTripNormalize(etreedict) + try: + self.assertRecursiveEqual(netreedict, rtreedict) + except AssertionError: + from pprint import pprint + print("\nDUMP:") + pprint(wtreedict) + print("\nEXPECTED:") + pprint(netreedict) + print("\nHDF5:") + pprint(rtreedict) + raise + + def assert_append(update_mode, add_nx_class=None): + append_file(update_mode, add_nx_class=add_nx_class) + assert_file() + + # First to an empty file + assert_append(None) + + # Add non-existing attributes/datasets/groups + wtreedict["group1"].pop("a") + wtreedict["group2"].pop("@attr1") + wtreedict["group2"]["@attr2"] = "attr3" # only for update + wtreedict["group2"]["@type"] = "test" + wtreedict["group2"]["dataset4"] = 9 # only for update + del wtreedict["group2"]["dataset4@units"] + wtreedict["group3"] = {} + esubtree["group2"]["@type"] = "test" + assert_append("add") + + # Add update existing attributes and datasets + esubtree["group2"]["@attr2"] = "attr3" + esubtree["group2"]["dataset4"] = 9 + assert_append("modify") + + # Do not add missing NX_class by default when updating + wtreedict["group2"]["@NX_class"] = "NXprocess" + esubtree["group2"]["@NX_class"] = "NXprocess" + assert_append("modify") + del wtreedict["group2"]["@NX_class"] + assert_append("modify") + + # Overwrite existing groups/datasets/attributes + esubtree["group1"].pop("a") + esubtree["group2"].pop("@attr1") + esubtree["group2"]["@NX_class"] = "NXcollection" + esubtree["group2"]["dataset4"] = 9 + del esubtree["group2"]["dataset4@units"] + esubtree["group3"] = {"@NX_class": "NXcollection"} + assert_append("replace", add_nx_class=True) + + +class TestNxToDict(H5DictTestCase): def setUp(self): self.tempdir = tempfile.mkdtemp() self.h5_fname = os.path.join(self.tempdir, "nx.h5") @@ -510,7 +893,7 @@ class TestNxToDict(unittest.TestCase): h5todict(self.h5_fname, path="/Mars", errors='raise') -class TestDictToJson(unittest.TestCase): +class TestDictToJson(DictTestCase): def setUp(self): self.dir_path = tempfile.mkdtemp() self.json_fname = os.path.join(self.dir_path, "cityattrs.json") @@ -528,7 +911,7 @@ class TestDictToJson(unittest.TestCase): self.assertIn('"inhabitants": 160215', json_content) -class TestDictToIni(unittest.TestCase): +class TestDictToIni(DictTestCase): def setUp(self): self.dir_path = tempfile.mkdtemp() self.ini_fname = os.path.join(self.dir_path, "test.ini") diff --git a/silx/io/test/test_h5py_utils.py b/silx/io/test/test_h5py_utils.py new file mode 100644 index 0000000..2e2e3dd --- /dev/null +++ b/silx/io/test/test_h5py_utils.py @@ -0,0 +1,397 @@ +# coding: utf-8 +# /*########################################################################## +# Copyright (C) 2016-2017 European Synchrotron Radiation Facility +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. +# +# ############################################################################*/ +"""Tests for h5py utilities""" + +__authors__ = ["W. de Nolf"] +__license__ = "MIT" +__date__ = "27/01/2020" + + +import unittest +import os +import sys +import time +import shutil +import tempfile +import threading +import multiprocessing +from contextlib import contextmanager + +from .. import h5py_utils +from ...utils.retry import RetryError, RetryTimeoutError + +IS_WINDOWS = sys.platform == "win32" + + +def _subprocess_context_main(queue, contextmgr, *args, **kw): + try: + with contextmgr(*args, **kw): + queue.put(None) + threading.Event().wait() + except Exception: + queue.put(None) + raise + + +@contextmanager +def _subprocess_context(contextmgr, *args, **kw): + timeout = kw.pop("timeout", 10) + queue = multiprocessing.Queue(maxsize=1) + p = multiprocessing.Process( + target=_subprocess_context_main, args=(queue, contextmgr) + args, kwargs=kw + ) + p.start() + try: + queue.get(timeout=timeout) + yield + finally: + try: + p.kill() + except AttributeError: + p.terminate() + p.join(timeout) + + +@contextmanager +def _open_context(filename, **kw): + with h5py_utils.File(filename, **kw) as f: + if kw.get("mode") == "w": + f["check"] = True + f.flush() + yield f + + +def _cause_segfault(): + import ctypes + + i = ctypes.c_char(b"a") + j = ctypes.pointer(i) + c = 0 + while True: + j[c] = b"a" + c += 1 + + +def _top_level_names_test(txtfilename, *args, **kw): + sys.stderr = open(os.devnull, "w") + + with open(txtfilename, mode="r") as f: + failcounter = int(f.readline().strip()) + + ncausefailure = kw.pop("ncausefailure") + faildelay = kw.pop("faildelay") + if failcounter < ncausefailure: + time.sleep(faildelay) + failcounter += 1 + with open(txtfilename, mode="w") as f: + f.write(str(failcounter)) + if failcounter % 2: + raise RetryError + else: + _cause_segfault() + return h5py_utils._top_level_names(*args, **kw) + + +top_level_names_test = h5py_utils.retry_in_subprocess()(_top_level_names_test) + + +def subtests(test): + def wrapper(self): + for _ in self._subtests(): + with self.subTest(**self._subtest_options): + test(self) + + return wrapper + + +class TestH5pyUtils(unittest.TestCase): + def setUp(self): + self.test_dir = tempfile.mkdtemp() + + def tearDown(self): + shutil.rmtree(self.test_dir) + + def _subtests(self): + self._subtest_options = {"mode": "w"} + self.filename_generator = self._filenames() + yield + self._subtest_options = {"mode": "w", "libver": "latest"} + self.filename_generator = self._filenames() + yield + + @property + def _liber_allows_concurrent_access(self): + return self._subtest_options.get("libver") in [None, "earliest", "v18"] + + def _filenames(self): + i = 1 + while True: + filename = os.path.join(self.test_dir, "file{}.h5".format(i)) + with self._open_context(filename): + pass + yield filename + i += 1 + + def _new_filename(self): + return next(self.filename_generator) + + @contextmanager + def _open_context(self, filename, **kwargs): + kw = self._subtest_options + kw.update(kwargs) + with _open_context(filename, **kw) as f: + + yield f + + @contextmanager + def _open_context_subprocess(self, filename, **kwargs): + kw = self._subtest_options + kw.update(kwargs) + with _subprocess_context(_open_context, filename, **kw): + yield + + def _assert_hdf5_data(self, f): + self.assertTrue(f["check"][()]) + + def _validate_hdf5_data(self, filename, swmr=False): + with self._open_context(filename, mode="r") as f: + self.assertEqual(f.swmr_mode, swmr) + self._assert_hdf5_data(f) + + @subtests + def test_modes_single_process(self): + orig = os.environ.get("HDF5_USE_FILE_LOCKING") + filename1 = self._new_filename() + self.assertEqual(orig, os.environ.get("HDF5_USE_FILE_LOCKING")) + filename2 = self._new_filename() + self.assertEqual(orig, os.environ.get("HDF5_USE_FILE_LOCKING")) + with self._open_context(filename1, mode="r"): + with self._open_context(filename2, mode="r"): + pass + for mode in ["w", "a"]: + with self.assertRaises(RuntimeError): + with self._open_context(filename2, mode=mode): + pass + self.assertEqual(orig, os.environ.get("HDF5_USE_FILE_LOCKING")) + with self._open_context(filename1, mode="a"): + for mode in ["w", "a"]: + with self._open_context(filename2, mode=mode): + pass + with self.assertRaises(RuntimeError): + with self._open_context(filename2, mode="r"): + pass + self.assertEqual(orig, os.environ.get("HDF5_USE_FILE_LOCKING")) + + @subtests + def test_modes_multi_process(self): + if not self._liber_allows_concurrent_access: + # A concurrent reader with HDF5_USE_FILE_LOCKING=FALSE is + # no longer works with HDF5 >=1.10 (you get an exception + # when trying to open the file) + return + filename = self._new_filename() + + # File open by truncating writer + with self._open_context_subprocess(filename, mode="w"): + with self._open_context(filename, mode="r") as f: + self._assert_hdf5_data(f) + if IS_WINDOWS: + with self._open_context(filename, mode="a") as f: + self._assert_hdf5_data(f) + else: + with self.assertRaises(OSError): + with self._open_context(filename, mode="a") as f: + pass + self._validate_hdf5_data(filename) + + # File open by appending writer + with self._open_context_subprocess(filename, mode="a"): + with self._open_context(filename, mode="r") as f: + self._assert_hdf5_data(f) + if IS_WINDOWS: + with self._open_context(filename, mode="a") as f: + self._assert_hdf5_data(f) + else: + with self.assertRaises(OSError): + with self._open_context(filename, mode="a") as f: + pass + self._validate_hdf5_data(filename) + + # File open by reader + with self._open_context_subprocess(filename, mode="r"): + with self._open_context(filename, mode="r") as f: + self._assert_hdf5_data(f) + with self._open_context(filename, mode="a") as f: + pass + self._validate_hdf5_data(filename) + + # File open by locking reader + with _subprocess_context( + _open_context, filename, mode="r", enable_file_locking=True + ): + with self._open_context(filename, mode="r") as f: + self._assert_hdf5_data(f) + if IS_WINDOWS: + with self._open_context(filename, mode="a") as f: + self._assert_hdf5_data(f) + else: + with self.assertRaises(OSError): + with self._open_context(filename, mode="a") as f: + pass + self._validate_hdf5_data(filename) + + @subtests + @unittest.skipIf(not h5py_utils.HAS_SWMR, "SWMR not supported") + def test_modes_multi_process_swmr(self): + filename = self._new_filename() + + with self._open_context(filename, mode="w", libver="latest") as f: + pass + + # File open by SWMR writer + with self._open_context_subprocess(filename, mode="a", swmr=True): + with self._open_context(filename, mode="r") as f: + assert f.swmr_mode + self._assert_hdf5_data(f) + with self.assertRaises(OSError): + with self._open_context(filename, mode="a") as f: + pass + self._validate_hdf5_data(filename, swmr=True) + + @subtests + def test_retry_defaults(self): + filename = self._new_filename() + + names = h5py_utils.top_level_names(filename) + self.assertEqual(names, []) + + names = h5py_utils.safe_top_level_names(filename) + self.assertEqual(names, []) + + names = h5py_utils.top_level_names(filename, include_only=None) + self.assertEqual(names, ["check"]) + + names = h5py_utils.safe_top_level_names(filename, include_only=None) + self.assertEqual(names, ["check"]) + + with h5py_utils.open_item(filename, "/check", validate=lambda x: False) as item: + self.assertEqual(item, None) + + with h5py_utils.open_item(filename, "/check", validate=None) as item: + self.assertTrue(item[()]) + + with self.assertRaises(RetryTimeoutError): + with h5py_utils.open_item( + filename, + "/check", + retry_timeout=0.1, + retry_invalid=True, + validate=lambda x: False, + ) as item: + pass + + ncall = 0 + + def validate(item): + nonlocal ncall + if ncall >= 1: + return True + else: + ncall += 1 + raise RetryError + + with h5py_utils.open_item( + filename, "/check", validate=validate, retry_timeout=1, retry_invalid=True + ) as item: + self.assertTrue(item[()]) + + @subtests + def test_retry_custom(self): + filename = self._new_filename() + ncausefailure = 3 + faildelay = 0.1 + sufficient_timeout = ncausefailure * (faildelay + 10) + insufficient_timeout = ncausefailure * faildelay * 0.5 + + @h5py_utils.retry_contextmanager() + def open_item(filename, name): + nonlocal failcounter + if failcounter < ncausefailure: + time.sleep(faildelay) + failcounter += 1 + raise RetryError + with h5py_utils.File(filename) as h5file: + yield h5file[name] + + failcounter = 0 + kw = {"retry_timeout": sufficient_timeout} + with open_item(filename, "/check", **kw) as item: + self.assertTrue(item[()]) + + failcounter = 0 + kw = {"retry_timeout": insufficient_timeout} + with self.assertRaises(RetryTimeoutError): + with open_item(filename, "/check", **kw) as item: + pass + + @subtests + def test_retry_in_subprocess(self): + filename = self._new_filename() + txtfilename = os.path.join(self.test_dir, "failcounter.txt") + ncausefailure = 3 + faildelay = 0.1 + sufficient_timeout = ncausefailure * (faildelay + 10) + insufficient_timeout = ncausefailure * faildelay * 0.5 + + kw = { + "retry_timeout": sufficient_timeout, + "include_only": None, + "ncausefailure": ncausefailure, + "faildelay": faildelay, + } + with open(txtfilename, mode="w") as f: + f.write("0") + names = top_level_names_test(txtfilename, filename, **kw) + self.assertEqual(names, ["check"]) + + kw = { + "retry_timeout": insufficient_timeout, + "include_only": None, + "ncausefailure": ncausefailure, + "faildelay": faildelay, + } + with open(txtfilename, mode="w") as f: + f.write("0") + with self.assertRaises(RetryTimeoutError): + top_level_names_test(txtfilename, filename, **kw) + + +def suite(): + test_suite = unittest.TestSuite() + test_suite.addTest(unittest.defaultTestLoader.loadTestsFromTestCase(TestH5pyUtils)) + return test_suite + + +if __name__ == "__main__": + unittest.main(defaultTest="suite") diff --git a/silx/io/url.py b/silx/io/url.py index 044977c..66b75f0 100644 --- a/silx/io/url.py +++ b/silx/io/url.py @@ -344,6 +344,8 @@ class DataUrl(object): :rtype: bool """ file_path = self.file_path() + if file_path is None: + return False if len(file_path) > 0: if file_path[0] == "/": return True |