summaryrefslogtreecommitdiff
path: root/src/silx/io/dictdump.py
diff options
context:
space:
mode:
Diffstat (limited to 'src/silx/io/dictdump.py')
-rw-r--r--src/silx/io/dictdump.py208
1 files changed, 115 insertions, 93 deletions
diff --git a/src/silx/io/dictdump.py b/src/silx/io/dictdump.py
index a24de42..7722842 100644
--- a/src/silx/io/dictdump.py
+++ b/src/silx/io/dictdump.py
@@ -1,6 +1,5 @@
-# coding: utf-8
# /*##########################################################################
-# Copyright (C) 2016-2020 European Synchrotron Radiation Facility
+# Copyright (C) 2016-2023 European Synchrotron Radiation Facility
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
@@ -25,15 +24,21 @@
by text strings to following file formats: `HDF5, INI, JSON`
"""
-from collections import OrderedDict
from collections.abc import Mapping
import json
import logging
import numpy
import os.path
-import sys
import h5py
+try:
+ from pint import Quantity as PintQuantity
+except ImportError:
+ try:
+ from pint.quantity import Quantity as PintQuantity
+ except ImportError:
+ PintQuantity = None
+
from .configdict import ConfigDict
from .utils import is_group
from .utils import is_dataset
@@ -44,7 +49,6 @@ from .utils import is_file as is_h5_file_like
from .utils import open as h5open
from .utils import h5py_read_dataset
from .utils import H5pyAttributesReadWrapper
-from silx.utils.deprecation import deprecated_warning
__authors__ = ["P. Knobel"]
__license__ = "MIT"
@@ -65,6 +69,8 @@ def _prepare_hdf5_write_value(array_like):
``numpy.array()`` (`str`, `list`, `numpy.ndarray`…)
:return: ``numpy.ndarray`` ready to be written as an HDF5 dataset
"""
+ if PintQuantity is not None and isinstance(array_like, PintQuantity):
+ return numpy.array(array_like.magnitude)
array = numpy.asarray(array_like)
if numpy.issubdtype(array.dtype, numpy.bytes_):
return numpy.array(array_like, dtype=vlen_bytes)
@@ -88,6 +94,7 @@ class _SafeH5FileWrite:
function. The object is created in the initial call if a path is provided,
and it is closed only at the end when all the processing is finished.
"""
+
def __init__(self, h5file, mode="w"):
"""
:param h5file: HDF5 file path or :class:`h5py.File` instance
@@ -121,6 +128,7 @@ class _SafeH5FileRead:
that SPEC files and all formats supported by fabio can also be opened,
but in read-only mode.
"""
+
def __init__(self, h5file):
"""
@@ -170,9 +178,14 @@ def _normalize_h5_path(h5root, h5path):
return h5file, h5path
-def dicttoh5(treedict, h5file, h5path='/',
- mode="w", overwrite_data=None,
- create_dataset_args=None, update_mode=None):
+def dicttoh5(
+ treedict,
+ h5file,
+ h5path="/",
+ mode="w",
+ create_dataset_args=None,
+ update_mode=None,
+):
"""Write a nested dictionary to a HDF5 file, using keys as member names.
If a dictionary value is a sub-dictionary, a group is created. If it is
@@ -202,9 +215,6 @@ def dicttoh5(treedict, h5file, h5path='/',
``"w"`` (write, existing file is lost), ``"w-"`` (write, fail if
exists) or ``"a"`` (read/write if exists, create otherwise).
This parameter is ignored if ``h5file`` is a file handle.
- :param overwrite_data: Deprecated. ``True`` is approximately equivalent
- to ``update_mode="modify"`` and ``False`` is equivalent to
- ``update_mode="add"``.
:param create_dataset_args: Dictionary of args you want to pass to
``h5f.create_dataset``. This allows you to specify filters and
compression parameters. Don't specify ``name`` and ``data``.
@@ -246,32 +256,16 @@ def dicttoh5(treedict, h5file, h5path='/',
create_dataset_args=create_ds_args)
"""
- if overwrite_data is not None:
- reason = (
- "`overwrite_data=True` becomes `update_mode='modify'` and "
- "`overwrite_data=False` becomes `update_mode='add'`"
- )
- deprecated_warning(
- type_="argument",
- name="overwrite_data",
- reason=reason,
- replacement="update_mode",
- since_version="0.15",
- )
-
if update_mode is None:
- if overwrite_data:
- update_mode = "modify"
- else:
- update_mode = "add"
- else:
- if update_mode not in UPDATE_MODE_VALID_EXISTING_VALUES:
- raise ValueError((
+ update_mode = "add"
+
+ if update_mode not in UPDATE_MODE_VALID_EXISTING_VALUES:
+ raise ValueError(
+ (
"Argument 'update_mode' can only have values: {}"
"".format(UPDATE_MODE_VALID_EXISTING_VALUES)
- ))
- if overwrite_data is not None:
- logger.warning("The argument `overwrite_data` is ignored")
+ )
+ )
if not isinstance(treedict, Mapping):
raise TypeError("'treedict' must be a dictionary")
@@ -294,6 +288,9 @@ def dicttoh5(treedict, h5file, h5path='/',
del h5f[h5path]
h5f.create_group(h5path)
else:
+ logger.info(
+ f'Cannot overwrite {h5f.file.filename}::{h5f[h5path].name} with update_mode="{update_mode}"'
+ )
return
else:
h5f.create_group(h5path)
@@ -314,9 +311,13 @@ def dicttoh5(treedict, h5file, h5path='/',
del h5f[h5name]
exists = False
if value:
- dicttoh5(value, h5f, h5name,
- update_mode=update_mode,
- create_dataset_args=create_dataset_args)
+ dicttoh5(
+ value,
+ h5f,
+ h5name,
+ update_mode=update_mode,
+ create_dataset_args=create_dataset_args,
+ )
elif not exists:
h5f.create_group(h5name)
elif is_link(value):
@@ -330,6 +331,9 @@ def dicttoh5(treedict, h5file, h5path='/',
else:
# HDF5 dataset
if exists and not change_allowed:
+ logger.info(
+ f'Cannot modify dataset {h5f.file.filename}::{h5f[h5name].name} with update_mode="{update_mode}"'
+ )
continue
data = _prepare_hdf5_write_value(value)
@@ -343,19 +347,28 @@ def dicttoh5(treedict, h5file, h5path='/',
# Delete the existing dataset
if update_mode != "replace":
if not is_dataset(h5f[h5name]):
+ logger.info(
+ f'Cannot overwrite {h5f.file.filename}::{h5f[h5name].name} with update_mode="{update_mode}"'
+ )
continue
attrs_backup = dict(h5f[h5name].attrs)
del h5f[h5name]
# Create dataset
# can't apply filters on scalars (datasets with shape == ())
- if data.shape == () or create_dataset_args is None:
- h5f.create_dataset(h5name,
- data=data)
- else:
- h5f.create_dataset(h5name,
- data=data,
- **create_dataset_args)
+ try:
+ if data.shape == () or create_dataset_args is None:
+ h5f.create_dataset(h5name, data=data)
+ else:
+ h5f.create_dataset(h5name, data=data, **create_dataset_args)
+ except Exception as e:
+ if isinstance(data, numpy.ndarray):
+ dtype = f"numpy.ndarray-{data.dtype}"
+ else:
+ dtype = type(data)
+ raise ValueError(
+ f"Failed to create dataset '{h5name}' with data ({dtype}) = {data}"
+ ) from e
if attrs_backup:
h5f[h5name].attrs.update(attrs_backup)
@@ -381,19 +394,20 @@ def dicttoh5(treedict, h5file, h5path='/',
else:
# Add/modify HDF5 attribute
if exists and not change_allowed:
+ logger.info(
+ f'Cannot modify attribute {h5f.file.filename}::{h5f[h5name].name}@{attr_name} with update_mode="{update_mode}"'
+ )
continue
data = _prepare_hdf5_write_value(value)
h5a[attr_name] = data
def _has_nx_class(treedict, key=""):
- return key + "@NX_class" in treedict or \
- (key, "NX_class") in treedict
+ return key + "@NX_class" in treedict or (key, "NX_class") in treedict
def _ensure_nx_class(treedict, parents=tuple()):
- """Each group needs an "NX_class" attribute.
- """
+ """Each group needs an "NX_class" attribute."""
if _has_nx_class(treedict):
return
nparents = len(parents)
@@ -405,13 +419,11 @@ def _ensure_nx_class(treedict, parents=tuple()):
treedict[("", "NX_class")] = "NXcollection"
-def nexus_to_h5_dict(
- treedict, parents=tuple(), add_nx_class=True, has_nx_class=False
-):
+def nexus_to_h5_dict(treedict, parents=tuple(), add_nx_class=True, has_nx_class=False):
"""The following conversions are applied:
* key with "{name}@{attr_name}" notation: key converted to 2-tuple
* key with ">{url}" notation: strip ">" and convert value to
- h5py.SoftLink or h5py.ExternalLink
+ h5py.SoftLink or h5py.ExternalLink
:param treedict: Nested dictionary/tree structure with strings as keys
and array-like objects as leafs. The ``"/"`` character can be used
@@ -452,17 +464,25 @@ def nexus_to_h5_dict(
value = h5py.SoftLink(first)
elif is_link(value):
key = key[1:]
+
if isinstance(value, Mapping):
# HDF5 group
key_has_nx_class = add_nx_class and _has_nx_class(treedict, key)
copy[key] = nexus_to_h5_dict(
value,
- parents=parents+(key,),
+ parents=parents + (key,),
add_nx_class=add_nx_class,
- has_nx_class=key_has_nx_class)
+ has_nx_class=key_has_nx_class,
+ )
+
+ elif PintQuantity is not None and isinstance(value, PintQuantity):
+ copy[key] = value.magnitude
+ copy[(key, "units")] = f"{value.units:~C}"
+
else:
# HDF5 dataset or link
copy[key] = value
+
if add_nx_class and not has_nx_class:
_ensure_nx_class(copy, parents)
return copy
@@ -516,23 +536,25 @@ def _handle_error(mode: str, exception, msg: str, *args) -> None:
:param str msg: Error message template
:param List[str] args: Arguments for error message template
"""
- if mode == 'ignore':
+ if mode == "ignore":
return # no-op
- elif mode == 'log':
+ elif mode == "log":
logger.error(msg, *args)
- elif mode == 'raise':
+ elif mode == "raise":
raise exception(msg % args)
else:
raise ValueError("Unsupported error handling: %s" % mode)
-def h5todict(h5file,
- path="/",
- exclude_names=None,
- asarray=True,
- dereference_links=True,
- include_attributes=False,
- errors='raise'):
+def h5todict(
+ h5file,
+ path="/",
+ exclude_names=None,
+ asarray=True,
+ dereference_links=True,
+ include_attributes=False,
+ errors="raise",
+):
"""Read a HDF5 file and return a nested dictionary with the complete file
structure and all data.
@@ -581,20 +603,18 @@ def h5todict(h5file,
with _SafeH5FileRead(h5file) as h5f:
ddict = {}
if path not in h5f:
- _handle_error(
- errors, KeyError, 'Path "%s" does not exist in file.', path)
+ _handle_error(errors, KeyError, 'Path "%s" does not exist in file.', path)
return ddict
try:
root = h5f[path]
except KeyError as e:
if not isinstance(h5f.get(path, getlink=True), h5py.HardLink):
- _handle_error(errors,
- KeyError,
- 'Cannot retrieve path "%s" (broken link)',
- path)
+ _handle_error(
+ errors, KeyError, 'Cannot retrieve path "%s" (broken link)', path
+ )
else:
- _handle_error(errors, KeyError, ', '.join(e.args))
+ _handle_error(errors, KeyError, ", ".join(e.args))
return ddict
# Read the attributes of the group
@@ -618,31 +638,35 @@ def h5todict(h5file,
h5obj = h5f[h5name]
except KeyError as e:
if not isinstance(h5f.get(h5name, getlink=True), h5py.HardLink):
- _handle_error(errors,
- KeyError,
- 'Cannot retrieve path "%s" (broken link)',
- h5name)
+ _handle_error(
+ errors,
+ KeyError,
+ 'Cannot retrieve path "%s" (broken link)',
+ h5name,
+ )
else:
- _handle_error(errors, KeyError, ', '.join(e.args))
+ _handle_error(errors, KeyError, ", ".join(e.args))
continue
if is_group(h5obj):
# Child is an HDF5 group
- ddict[key] = h5todict(h5f,
- h5name,
- exclude_names=exclude_names,
- asarray=asarray,
- dereference_links=dereference_links,
- include_attributes=include_attributes)
+ ddict[key] = h5todict(
+ h5f,
+ h5name,
+ exclude_names=exclude_names,
+ asarray=asarray,
+ dereference_links=dereference_links,
+ include_attributes=include_attributes,
+ errors=errors,
+ )
else:
# Child is an HDF5 dataset
try:
data = h5py_read_dataset(h5obj)
except OSError:
- _handle_error(errors,
- OSError,
- 'Cannot retrieve dataset "%s"',
- h5name)
+ _handle_error(
+ errors, OSError, 'Cannot retrieve dataset "%s"', h5name
+ )
else:
if asarray: # Convert HDF5 dataset to numpy array
data = numpy.array(data, copy=False)
@@ -710,9 +734,7 @@ def dicttonx(treedict, h5file, h5path="/", add_nx_class=None, **kw):
parents = tuple(p for p in h5path.split("/") if p)
if add_nx_class is None:
add_nx_class = kw.get("update_mode", None) in (None, "add")
- nxtreedict = nexus_to_h5_dict(
- treedict, parents=parents, add_nx_class=add_nx_class
- )
+ nxtreedict = nexus_to_h5_dict(treedict, parents=parents, add_nx_class=add_nx_class)
dicttoh5(nxtreedict, h5file, h5path=h5path, **kw)
@@ -788,7 +810,7 @@ def dump(ddict, ffile, mode="w", fmat=None):
"""
if fmat is None:
# If file-like object get its name, else use ffile as filename
- filename = getattr(ffile, 'name', ffile)
+ filename = getattr(ffile, "name", ffile)
fmat = os.path.splitext(filename)[1][1:] # Strip extension leading '.'
fmat = fmat.lower()
@@ -805,7 +827,7 @@ def dump(ddict, ffile, mode="w", fmat=None):
def load(ffile, fmat=None):
"""Load dictionary from a file
- When loading from a JSON or INI file, an OrderedDict is returned to
+ When loading from a JSON or INI file, the returned dict
preserve the values' insertion order.
:param ffile: File name or file-like object with a ``read`` method
@@ -813,7 +835,7 @@ def load(ffile, fmat=None):
When None (the default), it uses the filename extension as the format.
Loading from a HDF5 file requires `h5py <http://www.h5py.org/>`_ to be
installed.
- :return: Dictionary (ordered dictionary for JSON and INI)
+ :return: Dictionary
:raises IOError: if file format is not supported
"""
must_be_closed = False
@@ -831,7 +853,7 @@ def load(ffile, fmat=None):
fmat = fmat.lower()
if fmat == "json":
- return json.load(f, object_pairs_hook=OrderedDict)
+ return json.load(f)
if fmat in ["hdf5", "h5"]:
return h5todict(fname)
elif fmat in ["ini", "cfg"]: