summaryrefslogtreecommitdiff
path: root/silx/io/dictdump.py
diff options
context:
space:
mode:
Diffstat (limited to 'silx/io/dictdump.py')
-rw-r--r--silx/io/dictdump.py303
1 files changed, 228 insertions, 75 deletions
diff --git a/silx/io/dictdump.py b/silx/io/dictdump.py
index bbb244a..e907668 100644
--- a/silx/io/dictdump.py
+++ b/silx/io/dictdump.py
@@ -26,6 +26,7 @@ by text strings to following file formats: `HDF5, INI, JSON`
"""
from collections import OrderedDict
+from collections.abc import Mapping
import json
import logging
import numpy
@@ -34,11 +35,16 @@ import sys
import h5py
from .configdict import ConfigDict
-from .utils import is_group, is_link, is_softlink, is_externallink
+from .utils import is_group
+from .utils import is_dataset
+from .utils import is_link
+from .utils import is_softlink
+from .utils import is_externallink
from .utils import is_file as is_h5_file_like
from .utils import open as h5open
from .utils import h5py_read_dataset
from .utils import H5pyAttributesReadWrapper
+from silx.utils.deprecation import deprecated_warning
__authors__ = ["P. Knobel"]
__license__ = "MIT"
@@ -66,7 +72,7 @@ def _prepare_hdf5_write_value(array_like):
return array
-class _SafeH5FileWrite(object):
+class _SafeH5FileWrite:
"""Context manager returning a :class:`h5py.File` object.
If this object is initialized with a file path, we open the file
@@ -82,7 +88,6 @@ class _SafeH5FileWrite(object):
"""
def __init__(self, h5file, mode="w"):
"""
-
:param h5file: HDF5 file path or :class:`h5py.File` instance
:param str mode: Can be ``"r+"`` (read/write, file must exist),
``"w"`` (write, existing file is lost), ``"w-"`` (write, fail if
@@ -106,7 +111,7 @@ class _SafeH5FileWrite(object):
self.h5file.close()
-class _SafeH5FileRead(object):
+class _SafeH5FileRead:
"""Context manager returning a :class:`h5py.File` or a
:class:`silx.io.spech5.SpecH5` or a :class:`silx.io.fabioh5.File` object.
@@ -136,18 +141,48 @@ class _SafeH5FileRead(object):
self.h5file.close()
+def _normalize_h5_path(h5root, h5path):
+ """
+ :param h5root: File name or h5py-like File, Group or Dataset
+ :param str h5path: relative to ``h5root``
+ :returns 2-tuple: (File or file object, h5path)
+ """
+ if is_group(h5root):
+ group_name = h5root.name
+ if group_name == "/":
+ pass
+ elif h5path:
+ h5path = group_name + "/" + h5path
+ else:
+ h5path = group_name
+ h5file = h5root.file
+ elif is_dataset(h5root):
+ h5path = h5root.name
+ h5file = h5root.file
+ else:
+ h5file = h5root
+ if not h5path:
+ h5path = "/"
+ elif not h5path.endswith("/"):
+ h5path += "/"
+ return h5file, h5path
+
+
def dicttoh5(treedict, h5file, h5path='/',
- mode="w", overwrite_data=False,
- create_dataset_args=None):
+ mode="w", overwrite_data=None,
+ create_dataset_args=None, update_mode=None):
"""Write a nested dictionary to a HDF5 file, using keys as member names.
If a dictionary value is a sub-dictionary, a group is created. If it is
any other data type, it is cast into a numpy array and written as a
:mod:`h5py` dataset. Dictionary keys must be strings and cannot contain
the ``/`` character.
-
+
If dictionary keys are tuples they are interpreted to set h5 attributes.
- The tuples should have the format (dataset_name,attr_name)
+ The tuples should have the format (dataset_name, attr_name).
+
+ Existing HDF5 items can be deleted by providing the dictionary value
+ ``None``, provided that ``update_mode in ["modify", "replace"]``.
.. note::
@@ -158,21 +193,29 @@ def dicttoh5(treedict, h5file, h5path='/',
to define sub trees. If tuples are used as keys they should have the
format (dataset_name,attr_name) and will add a 5h attribute with the
corresponding value.
- :param h5file: HDF5 file name or handle. If a file name is provided, the
- function opens the file in the specified mode and closes it again
- before completing.
- :param h5path: Target path in HDF5 file in which scan groups are created.
+ :param h5file: File name or h5py-like File, Group or Dataset
+ :param h5path: Target path in the HDF5 file relative to ``h5file``.
Default is root (``"/"``)
:param mode: Can be ``"r+"`` (read/write, file must exist),
``"w"`` (write, existing file is lost), ``"w-"`` (write, fail if
exists) or ``"a"`` (read/write if exists, create otherwise).
This parameter is ignored if ``h5file`` is a file handle.
- :param overwrite_data: If ``True``, existing groups and datasets can be
- overwritten, if ``False`` they are skipped. This parameter is only
- relevant if ``h5file_mode`` is ``"r+"`` or ``"a"``.
+ :param overwrite_data: Deprecated. ``True`` is approximately equivalent
+ to ``update_mode="modify"`` and ``False`` is equivalent to
+ ``update_mode="add"``.
:param create_dataset_args: Dictionary of args you want to pass to
``h5f.create_dataset``. This allows you to specify filters and
compression parameters. Don't specify ``name`` and ``data``.
+ :param update_mode: Can be ``add`` (default), ``modify`` or ``replace``.
+
+ * ``add``: Extend the existing HDF5 tree when possible. Existing HDF5
+ items (groups, datasets and attributes) remain untouched.
+ * ``modify``: Extend the existing HDF5 tree when possible, modify
+ existing attributes, modify same-sized dataset values and delete
+ HDF5 items with a ``None`` value in the dict tree.
+ * ``replace``: Replace the existing HDF5 tree. Items from the root of
+ the HDF5 tree that are not present in the root of the dict tree
+ will remain untouched.
Example::
@@ -201,44 +244,110 @@ def dicttoh5(treedict, h5file, h5path='/',
create_dataset_args=create_ds_args)
"""
- if not h5path.endswith("/"):
- h5path += "/"
+ if overwrite_data is not None:
+ reason = (
+ "`overwrite_data=True` becomes `update_mode='modify'` and "
+ "`overwrite_data=False` becomes `update_mode='add'`"
+ )
+ deprecated_warning(
+ type_="argument",
+ name="overwrite_data",
+ reason=reason,
+ replacement="update_mode",
+ since_version="0.15",
+ )
+
+ if update_mode is None:
+ if overwrite_data:
+ update_mode = "modify"
+ else:
+ update_mode = "add"
+ else:
+ valid_existing_values = ("add", "replace", "modify")
+ if update_mode not in valid_existing_values:
+ raise ValueError((
+ "Argument 'update_mode' can only have values: {}"
+ "".format(valid_existing_values)
+ ))
+ if overwrite_data is not None:
+ logger.warning("The argument `overwrite_data` is ignored")
- with _SafeH5FileWrite(h5file, mode=mode) as h5f:
- if isinstance(treedict, dict) and h5path != "/":
- if h5path not in h5f:
- h5f.create_group(h5path)
+ if not isinstance(treedict, Mapping):
+ raise TypeError("'treedict' must be a dictionary")
- for key in filter(lambda k: not isinstance(k, tuple), treedict):
- key_is_group = isinstance(treedict[key], dict)
- h5name = h5path + key
+ h5file, h5path = _normalize_h5_path(h5file, h5path)
- if key_is_group and treedict[key]:
- # non-empty group: recurse
- dicttoh5(treedict[key], h5f, h5name,
- overwrite_data=overwrite_data,
- create_dataset_args=create_dataset_args)
- continue
+ def _iter_treedict(attributes=False):
+ nonlocal treedict
+ for key, value in treedict.items():
+ if isinstance(key, tuple) == attributes:
+ yield key, value
- if h5name in h5f:
- # key already exists: delete or skip
- if overwrite_data is True:
- del h5f[h5name]
+ change_allowed = update_mode in ("replace", "modify")
+
+ with _SafeH5FileWrite(h5file, mode=mode) as h5f:
+ # Create the root of the tree
+ if h5path in h5f:
+ if not is_group(h5f[h5path]):
+ if update_mode == "replace":
+ del h5f[h5path]
+ h5f.create_group(h5path)
else:
- logger.warning('key (%s) already exists. '
- 'Not overwriting.' % (h5name))
- continue
+ return
+ else:
+ h5f.create_group(h5path)
- value = treedict[key]
+ # Loop over all groups, links and datasets
+ for key, value in _iter_treedict(attributes=False):
+ h5name = h5path + key
+ exists = h5name in h5f
- if value is None or key_is_group:
- # Create empty group
- h5f.create_group(h5name)
+ if value is None:
+ # Delete HDF5 item
+ if exists and change_allowed:
+ del h5f[h5name]
+ exists = False
+ elif isinstance(value, Mapping):
+ # HDF5 group
+ if exists and update_mode == "replace":
+ del h5f[h5name]
+ exists = False
+ if value:
+ dicttoh5(value, h5f, h5name,
+ update_mode=update_mode,
+ create_dataset_args=create_dataset_args)
+ elif not exists:
+ h5f.create_group(h5name)
elif is_link(value):
- h5f[h5name] = value
+ # HDF5 link
+ if exists and update_mode == "replace":
+ del h5f[h5name]
+ exists = False
+ if not exists:
+ # Create link from h5py link object
+ h5f[h5name] = value
else:
+ # HDF5 dataset
+ if exists and not change_allowed:
+ continue
data = _prepare_hdf5_write_value(value)
- # can't apply filters on scalars (datasets with shape == () )
+
+ # Edit the existing dataset
+ attrs_backup = None
+ if exists:
+ try:
+ h5f[h5name][()] = data
+ continue
+ except Exception:
+ # Delete the existing dataset
+ if update_mode != "replace":
+ if not is_dataset(h5f[h5name]):
+ continue
+ attrs_backup = dict(h5f[h5name].attrs)
+ del h5f[h5name]
+
+ # Create dataset
+ # can't apply filters on scalars (datasets with shape == ())
if data.shape == () or create_dataset_args is None:
h5f.create_dataset(h5name,
data=data)
@@ -246,36 +355,58 @@ def dicttoh5(treedict, h5file, h5path='/',
h5f.create_dataset(h5name,
data=data,
**create_dataset_args)
+ if attrs_backup:
+ h5f[h5name].attrs.update(attrs_backup)
- # deal with h5 attributes which have tuples as keys in treedict
- for key in filter(lambda k: isinstance(k, tuple), treedict):
- assert len(key) == 2, "attribute must be defined by 2 values"
+ # Loop over all attributes
+ for key, value in _iter_treedict(attributes=True):
+ if len(key) != 2:
+ raise ValueError("HDF5 attribute must be described by 2 values")
h5name = h5path + key[0]
attr_name = key[1]
if h5name not in h5f:
- # Create empty group if key for attr does not exist
+ # Create an empty group to store the attribute
h5f.create_group(h5name)
- logger.warning(
- "key (%s) does not exist. attr %s "
- "will be written to ." % (h5name, attr_name)
- )
-
- if attr_name in h5f[h5name].attrs:
- if not overwrite_data:
- logger.warning(
- "attribute %s@%s already exists. Not overwriting."
- "" % (h5name, attr_name)
- )
+
+ h5a = h5f[h5name].attrs
+ exists = attr_name in h5a
+
+ if value is None:
+ # Delete HDF5 attribute
+ if exists and change_allowed:
+ del h5a[attr_name]
+ exists = False
+ else:
+ # Add/modify HDF5 attribute
+ if exists and not change_allowed:
continue
+ data = _prepare_hdf5_write_value(value)
+ h5a[attr_name] = data
- # Write attribute
- value = treedict[key]
- data = _prepare_hdf5_write_value(value)
- h5f[h5name].attrs[attr_name] = data
+
+def _has_nx_class(treedict, key=""):
+ return key + "@NX_class" in treedict or \
+ (key, "NX_class") in treedict
+
+
+def _ensure_nx_class(treedict, parents=tuple()):
+ """Each group needs an "NX_class" attribute.
+ """
+ if _has_nx_class(treedict):
+ return
+ nparents = len(parents)
+ if nparents == 0:
+ treedict[("", "NX_class")] = "NXroot"
+ elif nparents == 1:
+ treedict[("", "NX_class")] = "NXentry"
+ else:
+ treedict[("", "NX_class")] = "NXcollection"
-def nexus_to_h5_dict(treedict, parents=tuple()):
+def nexus_to_h5_dict(
+ treedict, parents=tuple(), add_nx_class=True, has_nx_class=False
+):
"""The following conversions are applied:
* key with "{name}@{attr_name}" notation: key converted to 2-tuple
* key with ">{url}" notation: strip ">" and convert value to
@@ -286,14 +417,20 @@ def nexus_to_h5_dict(treedict, parents=tuple()):
to define sub tree. The ``"@"`` character is used to write attributes.
The ``">"`` prefix is used to define links.
:param parents: Needed to resolve up-links (tuple of HDF5 group names)
+ :param add_nx_class: Add "NX_class" attribute when missing
+ :param has_nx_class: The "NX_class" attribute is defined in the parent
:rtype dict:
"""
+ if not isinstance(treedict, Mapping):
+ raise TypeError("'treedict' must be a dictionary")
copy = dict()
for key, value in treedict.items():
if "@" in key:
+ # HDF5 attribute
key = tuple(key.rsplit("@", 1))
elif key.startswith(">"):
+ # HDF5 link
if isinstance(value, str):
key = key[1:]
first, sep, second = value.partition("::")
@@ -314,10 +451,19 @@ def nexus_to_h5_dict(treedict, parents=tuple()):
value = h5py.SoftLink(first)
elif is_link(value):
key = key[1:]
- if isinstance(value, dict):
- copy[key] = nexus_to_h5_dict(value, parents=parents+(key,))
+ if isinstance(value, Mapping):
+ # HDF5 group
+ key_has_nx_class = add_nx_class and _has_nx_class(treedict, key)
+ copy[key] = nexus_to_h5_dict(
+ value,
+ parents=parents+(key,),
+ add_nx_class=add_nx_class,
+ has_nx_class=key_has_nx_class)
else:
+ # HDF5 dataset or link
copy[key] = value
+ if add_nx_class and not has_nx_class:
+ _ensure_nx_class(copy, parents)
return copy
@@ -336,7 +482,8 @@ def h5_to_nexus_dict(treedict):
copy = dict()
for key, value in treedict.items():
if isinstance(key, tuple):
- assert len(key)==2, "attribute must be defined by 2 values"
+ if len(key) != 2:
+ raise ValueError("HDF5 attribute must be described by 2 values")
key = "%s@%s" % (key[0], key[1])
elif is_softlink(value):
key = ">" + key
@@ -344,7 +491,7 @@ def h5_to_nexus_dict(treedict):
elif is_externallink(value):
key = ">" + key
value = value.filename + "::" + value.path
- if isinstance(value, dict):
+ if isinstance(value, Mapping):
copy[key] = h5_to_nexus_dict(value)
else:
copy[key] = value
@@ -414,10 +561,8 @@ def h5todict(h5file,
scalars). In some cases, you may find that a list of heterogeneous
data types is converted to a numpy array of strings.
- :param h5file: File name or :class:`h5py.File` object or spech5 file or
- fabioh5 file.
- :param str path: Name of HDF5 group to use as dictionary root level,
- to read only a sub-group in the file
+ :param h5file: File name or h5py-like File, Group or Dataset
+ :param str path: Target path in the HDF5 file relative to ``h5file``
:param List[str] exclude_names: Groups and datasets whose name contains
a string in this list will be ignored. Default is None (ignore nothing)
:param bool asarray: True (default) to read scalar as arrays, False to
@@ -431,6 +576,7 @@ def h5todict(h5file,
- 'ignore': Ignore errors
:return: Nested dictionary
"""
+ h5file, path = _normalize_h5_path(h5file, path)
with _SafeH5FileRead(h5file) as h5f:
ddict = {}
if path not in h5f:
@@ -508,7 +654,7 @@ def h5todict(h5file,
return ddict
-def dicttonx(treedict, h5file, h5path="/", **kw):
+def dicttonx(treedict, h5file, h5path="/", add_nx_class=None, **kw):
"""
Write a nested dictionary to a HDF5 file, using string keys as member names.
The NeXus convention is used to identify attributes with ``"@"`` character,
@@ -521,6 +667,8 @@ def dicttonx(treedict, h5file, h5path="/", **kw):
and array-like objects as leafs. The ``"/"`` character can be used
to define sub tree. The ``"@"`` character is used to write attributes.
The ``">"`` prefix is used to define links.
+ :param add_nx_class: Add "NX_class" attribute when missing. By default it
+ is ``True`` when ``update_mode`` is ``"add"`` or ``None``.
The named parameters are passed to dicttoh5.
@@ -557,12 +705,17 @@ def dicttonx(treedict, h5file, h5path="/", **kw):
dicttonx(gauss,"test.h5")
"""
+ h5file, h5path = _normalize_h5_path(h5file, h5path)
parents = tuple(p for p in h5path.split("/") if p)
- nxtreedict = nexus_to_h5_dict(treedict, parents=parents)
+ if add_nx_class is None:
+ add_nx_class = kw.get("update_mode", None) in (None, "add")
+ nxtreedict = nexus_to_h5_dict(
+ treedict, parents=parents, add_nx_class=add_nx_class
+ )
dicttoh5(nxtreedict, h5file, h5path=h5path, **kw)
-def nxtodict(h5file, **kw):
+def nxtodict(h5file, include_attributes=True, **kw):
"""Read a HDF5 file and return a nested dictionary with the complete file
structure and all data.
@@ -571,7 +724,7 @@ def nxtodict(h5file, **kw):
The named parameters are passed to h5todict.
"""
- nxtreedict = h5todict(h5file, **kw)
+ nxtreedict = h5todict(h5file, include_attributes=include_attributes, **kw)
return h5_to_nexus_dict(nxtreedict)