1 files changed, 115 insertions, 93 deletions
diff --git a/src/silx/io/dictdump.py b/src/silx/io/dictdump.py
index a24de42..7722842 100644
--- a/src/silx/io/dictdump.py
+++ b/src/silx/io/dictdump.py
@@ -1,6 +1,5 @@
-# coding: utf-8
 # /*##########################################################################
-# Copyright (C) 2016-2020 European Synchrotron Radiation Facility
+# Copyright (C) 2016-2023 European Synchrotron Radiation Facility
 #
 # Permission is hereby granted, free of charge, to any person obtaining a copy
 # of this software and associated documentation files (the "Software"), to deal
@@ -25,15 +24,21 @@
 by text strings to following file formats: `HDF5, INI, JSON`
 """
 
-from collections import OrderedDict
 from collections.abc import Mapping
 import json
 import logging
 import numpy
 import os.path
-import sys
 import h5py
 
+try:
+    from pint import Quantity as PintQuantity
+except ImportError:
+    try:
+        from pint.quantity import Quantity as PintQuantity
+    except ImportError:
+        PintQuantity = None
+
 from .configdict import ConfigDict
 from .utils import is_group
 from .utils import is_dataset
@@ -44,7 +49,6 @@ from .utils import is_file as is_h5_file_like
 from .utils import open as h5open
 from .utils import h5py_read_dataset
 from .utils import H5pyAttributesReadWrapper
-from silx.utils.deprecation import deprecated_warning
 
 __authors__ = ["P. Knobel"]
 __license__ = "MIT"
@@ -65,6 +69,8 @@ def _prepare_hdf5_write_value(array_like):
         ``numpy.array()`` (`str`, `list`, `numpy.ndarray`…)
     :return: ``numpy.ndarray`` ready to be written as an HDF5 dataset
     """
+    if PintQuantity is not None and isinstance(array_like, PintQuantity):
+        return numpy.array(array_like.magnitude)
     array = numpy.asarray(array_like)
     if numpy.issubdtype(array.dtype, numpy.bytes_):
         return numpy.array(array_like, dtype=vlen_bytes)
@@ -88,6 +94,7 @@ class _SafeH5FileWrite:
     function. The object is created in the initial call if a path is provided,
     and it is closed only at the end when all the processing is finished.
     """
+
     def __init__(self, h5file, mode="w"):
         """
         :param h5file:  HDF5 file path or :class:`h5py.File` instance
@@ -121,6 +128,7 @@ class _SafeH5FileRead:
     that SPEC files and all formats supported by fabio can also be opened,
     but in read-only mode.
     """
+
     def __init__(self, h5file):
         """
 
@@ -170,9 +178,14 @@ def _normalize_h5_path(h5root, h5path):
     return h5file, h5path
 
 
-def dicttoh5(treedict, h5file, h5path='/',
-             mode="w", overwrite_data=None,
-             create_dataset_args=None, update_mode=None):
+def dicttoh5(
+    treedict,
+    h5file,
+    h5path="/",
+    mode="w",
+    create_dataset_args=None,
+    update_mode=None,
+):
     """Write a nested dictionary to a HDF5 file, using keys as member names.
 
     If a dictionary value is a sub-dictionary, a group is created. If it is
@@ -202,9 +215,6 @@ def dicttoh5(treedict, h5file, h5path='/',
         ``"w"`` (write, existing file is lost), ``"w-"`` (write, fail if
         exists) or ``"a"`` (read/write if exists, create otherwise).
         This parameter is ignored if ``h5file`` is a file handle.
-    :param overwrite_data: Deprecated. ``True`` is approximately equivalent
-        to ``update_mode="modify"`` and ``False`` is equivalent to
-        ``update_mode="add"``.
     :param create_dataset_args: Dictionary of args you want to pass to
         ``h5f.create_dataset``. This allows you to specify filters and
         compression parameters. Don't specify ``name`` and ``data``.
@@ -246,32 +256,16 @@ def dicttoh5(treedict, h5file, h5path='/',
                  create_dataset_args=create_ds_args)
     """
 
-    if overwrite_data is not None:
-        reason = (
-            "`overwrite_data=True` becomes `update_mode='modify'` and "
-            "`overwrite_data=False` becomes `update_mode='add'`"
-        )
-        deprecated_warning(
-            type_="argument",
-            name="overwrite_data",
-            reason=reason,
-            replacement="update_mode",
-            since_version="0.15",
-        )
-
     if update_mode is None:
-        if overwrite_data:
-            update_mode = "modify"
-        else:
-            update_mode = "add"
-    else:
-        if update_mode not in UPDATE_MODE_VALID_EXISTING_VALUES:
-            raise ValueError((
+        update_mode = "add"
+
+    if update_mode not in UPDATE_MODE_VALID_EXISTING_VALUES:
+        raise ValueError(
+            (
                 "Argument 'update_mode' can only have values: {}"
                 "".format(UPDATE_MODE_VALID_EXISTING_VALUES)
-            ))
-        if overwrite_data is not None:
-            logger.warning("The argument `overwrite_data` is ignored")
+            )
+        )
 
     if not isinstance(treedict, Mapping):
         raise TypeError("'treedict' must be a dictionary")
@@ -294,6 +288,9 @@ def dicttoh5(treedict, h5file, h5path='/',
                     del h5f[h5path]
                     h5f.create_group(h5path)
                 else:
+                    logger.info(
+                        f'Cannot overwrite {h5f.file.filename}::{h5f[h5path].name} with update_mode="{update_mode}"'
+                    )
                     return
         else:
             h5f.create_group(h5path)
@@ -314,9 +311,13 @@ def dicttoh5(treedict, h5file, h5path='/',
                     del h5f[h5name]
                     exists = False
                 if value:
-                    dicttoh5(value, h5f, h5name,
-                             update_mode=update_mode,
-                             create_dataset_args=create_dataset_args)
+                    dicttoh5(
+                        value,
+                        h5f,
+                        h5name,
+                        update_mode=update_mode,
+                        create_dataset_args=create_dataset_args,
+                    )
                 elif not exists:
                     h5f.create_group(h5name)
             elif is_link(value):
@@ -330,6 +331,9 @@ def dicttoh5(treedict, h5file, h5path='/',
             else:
                 # HDF5 dataset
                 if exists and not change_allowed:
+                    logger.info(
+                        f'Cannot modify dataset {h5f.file.filename}::{h5f[h5name].name} with update_mode="{update_mode}"'
+                    )
                     continue
                 data = _prepare_hdf5_write_value(value)
 
@@ -343,19 +347,28 @@ def dicttoh5(treedict, h5file, h5path='/',
                         # Delete the existing dataset
                         if update_mode != "replace":
                             if not is_dataset(h5f[h5name]):
+                                logger.info(
+                                    f'Cannot overwrite {h5f.file.filename}::{h5f[h5name].name} with update_mode="{update_mode}"'
+                                )
                                 continue
                             attrs_backup = dict(h5f[h5name].attrs)
                         del h5f[h5name]
 
                 # Create dataset
                 # can't apply filters on scalars (datasets with shape == ())
-                if data.shape == () or create_dataset_args is None:
-                    h5f.create_dataset(h5name,
-                                       data=data)
-                else:
-                    h5f.create_dataset(h5name,
-                                       data=data,
-                                       **create_dataset_args)
+                try:
+                    if data.shape == () or create_dataset_args is None:
+                        h5f.create_dataset(h5name, data=data)
+                    else:
+                        h5f.create_dataset(h5name, data=data, **create_dataset_args)
+                except Exception as e:
+                    if isinstance(data, numpy.ndarray):
+                        dtype = f"numpy.ndarray-{data.dtype}"
+                    else:
+                        dtype = type(data)
+                    raise ValueError(
+                        f"Failed to create dataset '{h5name}' with data ({dtype}) = {data}"
+                    ) from e
                 if attrs_backup:
                     h5f[h5name].attrs.update(attrs_backup)
 
@@ -381,19 +394,20 @@ def dicttoh5(treedict, h5file, h5path='/',
             else:
                 # Add/modify HDF5 attribute
                 if exists and not change_allowed:
+                    logger.info(
+                        f'Cannot modify attribute {h5f.file.filename}::{h5f[h5name].name}@{attr_name} with update_mode="{update_mode}"'
+                    )
                     continue
                 data = _prepare_hdf5_write_value(value)
                 h5a[attr_name] = data
 
 
 def _has_nx_class(treedict, key=""):
-    return key + "@NX_class" in treedict or \
-           (key, "NX_class") in treedict
+    return key + "@NX_class" in treedict or (key, "NX_class") in treedict
 
 
 def _ensure_nx_class(treedict, parents=tuple()):
-    """Each group needs an "NX_class" attribute.
-    """
+    """Each group needs an "NX_class" attribute."""
     if _has_nx_class(treedict):
         return
     nparents = len(parents)
@@ -405,13 +419,11 @@ def _ensure_nx_class(treedict, parents=tuple()):
         treedict[("", "NX_class")] = "NXcollection"
 
 
-def nexus_to_h5_dict(
-    treedict, parents=tuple(), add_nx_class=True, has_nx_class=False
-):
+def nexus_to_h5_dict(treedict, parents=tuple(), add_nx_class=True, has_nx_class=False):
     """The following conversions are applied:
         * key with "{name}@{attr_name}" notation: key converted to 2-tuple
         * key with ">{url}" notation: strip ">" and convert value to
-                                      h5py.SoftLink or h5py.ExternalLink 
+                                      h5py.SoftLink or h5py.ExternalLink
 
     :param treedict: Nested dictionary/tree structure with strings as keys
          and array-like objects as leafs. The ``"/"`` character can be used
@@ -452,17 +464,25 @@ def nexus_to_h5_dict(
                     value = h5py.SoftLink(first)
             elif is_link(value):
                 key = key[1:]
+
         if isinstance(value, Mapping):
             # HDF5 group
             key_has_nx_class = add_nx_class and _has_nx_class(treedict, key)
             copy[key] = nexus_to_h5_dict(
                 value,
-                parents=parents+(key,),
+                parents=parents + (key,),
                 add_nx_class=add_nx_class,
-                has_nx_class=key_has_nx_class)
+                has_nx_class=key_has_nx_class,
+            )
+
+        elif PintQuantity is not None and isinstance(value, PintQuantity):
+            copy[key] = value.magnitude
+            copy[(key, "units")] = f"{value.units:~C}"
+
         else:
             # HDF5 dataset or link
             copy[key] = value
+
     if add_nx_class and not has_nx_class:
         _ensure_nx_class(copy, parents)
     return copy
@@ -516,23 +536,25 @@ def _handle_error(mode: str, exception, msg: str, *args) -> None:
     :param str msg: Error message template
     :param List[str] args: Arguments for error message template
     """
-    if mode == 'ignore':
+    if mode == "ignore":
         return  # no-op
-    elif mode == 'log':
+    elif mode == "log":
         logger.error(msg, *args)
-    elif mode == 'raise':
+    elif mode == "raise":
         raise exception(msg % args)
     else:
         raise ValueError("Unsupported error handling: %s" % mode)
 
 
-def h5todict(h5file,
-             path="/",
-             exclude_names=None,
-             asarray=True,
-             dereference_links=True,
-             include_attributes=False,
-             errors='raise'):
+def h5todict(
+    h5file,
+    path="/",
+    exclude_names=None,
+    asarray=True,
+    dereference_links=True,
+    include_attributes=False,
+    errors="raise",
+):
     """Read a HDF5 file and return a nested dictionary with the complete file
     structure and all data.
 
@@ -581,20 +603,18 @@ def h5todict(h5file,
     with _SafeH5FileRead(h5file) as h5f:
         ddict = {}
         if path not in h5f:
-            _handle_error(
-                errors, KeyError, 'Path "%s" does not exist in file.', path)
+            _handle_error(errors, KeyError, 'Path "%s" does not exist in file.', path)
             return ddict
 
         try:
             root = h5f[path]
         except KeyError as e:
             if not isinstance(h5f.get(path, getlink=True), h5py.HardLink):
-                _handle_error(errors,
-                              KeyError,
-                              'Cannot retrieve path "%s" (broken link)',
-                              path)
+                _handle_error(
+                    errors, KeyError, 'Cannot retrieve path "%s" (broken link)', path
+                )
             else:
-                _handle_error(errors, KeyError, ', '.join(e.args))
+                _handle_error(errors, KeyError, ", ".join(e.args))
             return ddict
 
         # Read the attributes of the group
@@ -618,31 +638,35 @@ def h5todict(h5file,
                 h5obj = h5f[h5name]
             except KeyError as e:
                 if not isinstance(h5f.get(h5name, getlink=True), h5py.HardLink):
-                    _handle_error(errors,
-                                  KeyError,
-                                  'Cannot retrieve path "%s" (broken link)',
-                                  h5name)
+                    _handle_error(
+                        errors,
+                        KeyError,
+                        'Cannot retrieve path "%s" (broken link)',
+                        h5name,
+                    )
                 else:
-                    _handle_error(errors, KeyError, ', '.join(e.args))
+                    _handle_error(errors, KeyError, ", ".join(e.args))
                 continue
 
             if is_group(h5obj):
                 # Child is an HDF5 group
-                ddict[key] = h5todict(h5f,
-                                      h5name,
-                                      exclude_names=exclude_names,
-                                      asarray=asarray,
-                                      dereference_links=dereference_links,
-                                      include_attributes=include_attributes)
+                ddict[key] = h5todict(
+                    h5f,
+                    h5name,
+                    exclude_names=exclude_names,
+                    asarray=asarray,
+                    dereference_links=dereference_links,
+                    include_attributes=include_attributes,
+                    errors=errors,
+                )
             else:
                 # Child is an HDF5 dataset
                 try:
                     data = h5py_read_dataset(h5obj)
                 except OSError:
-                    _handle_error(errors,
-                                  OSError,
-                                  'Cannot retrieve dataset "%s"',
-                                  h5name)
+                    _handle_error(
+                        errors, OSError, 'Cannot retrieve dataset "%s"', h5name
+                    )
                 else:
                     if asarray:  # Convert HDF5 dataset to numpy array
                         data = numpy.array(data, copy=False)
@@ -710,9 +734,7 @@ def dicttonx(treedict, h5file, h5path="/", add_nx_class=None, **kw):
     parents = tuple(p for p in h5path.split("/") if p)
     if add_nx_class is None:
         add_nx_class = kw.get("update_mode", None) in (None, "add")
-    nxtreedict = nexus_to_h5_dict(
-        treedict, parents=parents, add_nx_class=add_nx_class
-    )
+    nxtreedict = nexus_to_h5_dict(treedict, parents=parents, add_nx_class=add_nx_class)
     dicttoh5(nxtreedict, h5file, h5path=h5path, **kw)
 
 
@@ -788,7 +810,7 @@ def dump(ddict, ffile, mode="w", fmat=None):
     """
     if fmat is None:
         # If file-like object get its name, else use ffile as filename
-        filename = getattr(ffile, 'name', ffile)
+        filename = getattr(ffile, "name", ffile)
         fmat = os.path.splitext(filename)[1][1:]  # Strip extension leading '.'
     fmat = fmat.lower()
 
@@ -805,7 +827,7 @@ def dump(ddict, ffile, mode="w", fmat=None):
 def load(ffile, fmat=None):
     """Load dictionary from a file
 
-    When loading from a JSON or INI file, an OrderedDict is returned to
+    When loading from a JSON or INI file, the returned dict
     preserve the values' insertion order.
 
     :param ffile: File name or file-like object with a ``read`` method
@@ -813,7 +835,7 @@ def load(ffile, fmat=None):
         When None (the default), it uses the filename extension as the format.
         Loading from a HDF5 file requires `h5py <http://www.h5py.org/>`_ to be
         installed.
-    :return: Dictionary (ordered dictionary for JSON and INI)
+    :return: Dictionary
     :raises IOError: if file format is not supported
     """
     must_be_closed = False
@@ -831,7 +853,7 @@ def load(ffile, fmat=None):
         fmat = fmat.lower()
 
         if fmat == "json":
-            return json.load(f, object_pairs_hook=OrderedDict)
+            return json.load(f)
         if fmat in ["hdf5", "h5"]:
             return h5todict(fname)
         elif fmat in ["ini", "cfg"]: