diff options
Diffstat (limited to 'src/silx/io/dictdump.py')
-rw-r--r-- | src/silx/io/dictdump.py | 208 |
1 files changed, 115 insertions, 93 deletions
diff --git a/src/silx/io/dictdump.py b/src/silx/io/dictdump.py index a24de42..7722842 100644 --- a/src/silx/io/dictdump.py +++ b/src/silx/io/dictdump.py @@ -1,6 +1,5 @@ -# coding: utf-8 # /*########################################################################## -# Copyright (C) 2016-2020 European Synchrotron Radiation Facility +# Copyright (C) 2016-2023 European Synchrotron Radiation Facility # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -25,15 +24,21 @@ by text strings to following file formats: `HDF5, INI, JSON` """ -from collections import OrderedDict from collections.abc import Mapping import json import logging import numpy import os.path -import sys import h5py +try: + from pint import Quantity as PintQuantity +except ImportError: + try: + from pint.quantity import Quantity as PintQuantity + except ImportError: + PintQuantity = None + from .configdict import ConfigDict from .utils import is_group from .utils import is_dataset @@ -44,7 +49,6 @@ from .utils import is_file as is_h5_file_like from .utils import open as h5open from .utils import h5py_read_dataset from .utils import H5pyAttributesReadWrapper -from silx.utils.deprecation import deprecated_warning __authors__ = ["P. Knobel"] __license__ = "MIT" @@ -65,6 +69,8 @@ def _prepare_hdf5_write_value(array_like): ``numpy.array()`` (`str`, `list`, `numpy.ndarray`…) :return: ``numpy.ndarray`` ready to be written as an HDF5 dataset """ + if PintQuantity is not None and isinstance(array_like, PintQuantity): + return numpy.array(array_like.magnitude) array = numpy.asarray(array_like) if numpy.issubdtype(array.dtype, numpy.bytes_): return numpy.array(array_like, dtype=vlen_bytes) @@ -88,6 +94,7 @@ class _SafeH5FileWrite: function. The object is created in the initial call if a path is provided, and it is closed only at the end when all the processing is finished. """ + def __init__(self, h5file, mode="w"): """ :param h5file: HDF5 file path or :class:`h5py.File` instance @@ -121,6 +128,7 @@ class _SafeH5FileRead: that SPEC files and all formats supported by fabio can also be opened, but in read-only mode. """ + def __init__(self, h5file): """ @@ -170,9 +178,14 @@ def _normalize_h5_path(h5root, h5path): return h5file, h5path -def dicttoh5(treedict, h5file, h5path='/', - mode="w", overwrite_data=None, - create_dataset_args=None, update_mode=None): +def dicttoh5( + treedict, + h5file, + h5path="/", + mode="w", + create_dataset_args=None, + update_mode=None, +): """Write a nested dictionary to a HDF5 file, using keys as member names. If a dictionary value is a sub-dictionary, a group is created. If it is @@ -202,9 +215,6 @@ def dicttoh5(treedict, h5file, h5path='/', ``"w"`` (write, existing file is lost), ``"w-"`` (write, fail if exists) or ``"a"`` (read/write if exists, create otherwise). This parameter is ignored if ``h5file`` is a file handle. - :param overwrite_data: Deprecated. ``True`` is approximately equivalent - to ``update_mode="modify"`` and ``False`` is equivalent to - ``update_mode="add"``. :param create_dataset_args: Dictionary of args you want to pass to ``h5f.create_dataset``. This allows you to specify filters and compression parameters. Don't specify ``name`` and ``data``. @@ -246,32 +256,16 @@ def dicttoh5(treedict, h5file, h5path='/', create_dataset_args=create_ds_args) """ - if overwrite_data is not None: - reason = ( - "`overwrite_data=True` becomes `update_mode='modify'` and " - "`overwrite_data=False` becomes `update_mode='add'`" - ) - deprecated_warning( - type_="argument", - name="overwrite_data", - reason=reason, - replacement="update_mode", - since_version="0.15", - ) - if update_mode is None: - if overwrite_data: - update_mode = "modify" - else: - update_mode = "add" - else: - if update_mode not in UPDATE_MODE_VALID_EXISTING_VALUES: - raise ValueError(( + update_mode = "add" + + if update_mode not in UPDATE_MODE_VALID_EXISTING_VALUES: + raise ValueError( + ( "Argument 'update_mode' can only have values: {}" "".format(UPDATE_MODE_VALID_EXISTING_VALUES) - )) - if overwrite_data is not None: - logger.warning("The argument `overwrite_data` is ignored") + ) + ) if not isinstance(treedict, Mapping): raise TypeError("'treedict' must be a dictionary") @@ -294,6 +288,9 @@ def dicttoh5(treedict, h5file, h5path='/', del h5f[h5path] h5f.create_group(h5path) else: + logger.info( + f'Cannot overwrite {h5f.file.filename}::{h5f[h5path].name} with update_mode="{update_mode}"' + ) return else: h5f.create_group(h5path) @@ -314,9 +311,13 @@ def dicttoh5(treedict, h5file, h5path='/', del h5f[h5name] exists = False if value: - dicttoh5(value, h5f, h5name, - update_mode=update_mode, - create_dataset_args=create_dataset_args) + dicttoh5( + value, + h5f, + h5name, + update_mode=update_mode, + create_dataset_args=create_dataset_args, + ) elif not exists: h5f.create_group(h5name) elif is_link(value): @@ -330,6 +331,9 @@ def dicttoh5(treedict, h5file, h5path='/', else: # HDF5 dataset if exists and not change_allowed: + logger.info( + f'Cannot modify dataset {h5f.file.filename}::{h5f[h5name].name} with update_mode="{update_mode}"' + ) continue data = _prepare_hdf5_write_value(value) @@ -343,19 +347,28 @@ def dicttoh5(treedict, h5file, h5path='/', # Delete the existing dataset if update_mode != "replace": if not is_dataset(h5f[h5name]): + logger.info( + f'Cannot overwrite {h5f.file.filename}::{h5f[h5name].name} with update_mode="{update_mode}"' + ) continue attrs_backup = dict(h5f[h5name].attrs) del h5f[h5name] # Create dataset # can't apply filters on scalars (datasets with shape == ()) - if data.shape == () or create_dataset_args is None: - h5f.create_dataset(h5name, - data=data) - else: - h5f.create_dataset(h5name, - data=data, - **create_dataset_args) + try: + if data.shape == () or create_dataset_args is None: + h5f.create_dataset(h5name, data=data) + else: + h5f.create_dataset(h5name, data=data, **create_dataset_args) + except Exception as e: + if isinstance(data, numpy.ndarray): + dtype = f"numpy.ndarray-{data.dtype}" + else: + dtype = type(data) + raise ValueError( + f"Failed to create dataset '{h5name}' with data ({dtype}) = {data}" + ) from e if attrs_backup: h5f[h5name].attrs.update(attrs_backup) @@ -381,19 +394,20 @@ def dicttoh5(treedict, h5file, h5path='/', else: # Add/modify HDF5 attribute if exists and not change_allowed: + logger.info( + f'Cannot modify attribute {h5f.file.filename}::{h5f[h5name].name}@{attr_name} with update_mode="{update_mode}"' + ) continue data = _prepare_hdf5_write_value(value) h5a[attr_name] = data def _has_nx_class(treedict, key=""): - return key + "@NX_class" in treedict or \ - (key, "NX_class") in treedict + return key + "@NX_class" in treedict or (key, "NX_class") in treedict def _ensure_nx_class(treedict, parents=tuple()): - """Each group needs an "NX_class" attribute. - """ + """Each group needs an "NX_class" attribute.""" if _has_nx_class(treedict): return nparents = len(parents) @@ -405,13 +419,11 @@ def _ensure_nx_class(treedict, parents=tuple()): treedict[("", "NX_class")] = "NXcollection" -def nexus_to_h5_dict( - treedict, parents=tuple(), add_nx_class=True, has_nx_class=False -): +def nexus_to_h5_dict(treedict, parents=tuple(), add_nx_class=True, has_nx_class=False): """The following conversions are applied: * key with "{name}@{attr_name}" notation: key converted to 2-tuple * key with ">{url}" notation: strip ">" and convert value to - h5py.SoftLink or h5py.ExternalLink + h5py.SoftLink or h5py.ExternalLink :param treedict: Nested dictionary/tree structure with strings as keys and array-like objects as leafs. The ``"/"`` character can be used @@ -452,17 +464,25 @@ def nexus_to_h5_dict( value = h5py.SoftLink(first) elif is_link(value): key = key[1:] + if isinstance(value, Mapping): # HDF5 group key_has_nx_class = add_nx_class and _has_nx_class(treedict, key) copy[key] = nexus_to_h5_dict( value, - parents=parents+(key,), + parents=parents + (key,), add_nx_class=add_nx_class, - has_nx_class=key_has_nx_class) + has_nx_class=key_has_nx_class, + ) + + elif PintQuantity is not None and isinstance(value, PintQuantity): + copy[key] = value.magnitude + copy[(key, "units")] = f"{value.units:~C}" + else: # HDF5 dataset or link copy[key] = value + if add_nx_class and not has_nx_class: _ensure_nx_class(copy, parents) return copy @@ -516,23 +536,25 @@ def _handle_error(mode: str, exception, msg: str, *args) -> None: :param str msg: Error message template :param List[str] args: Arguments for error message template """ - if mode == 'ignore': + if mode == "ignore": return # no-op - elif mode == 'log': + elif mode == "log": logger.error(msg, *args) - elif mode == 'raise': + elif mode == "raise": raise exception(msg % args) else: raise ValueError("Unsupported error handling: %s" % mode) -def h5todict(h5file, - path="/", - exclude_names=None, - asarray=True, - dereference_links=True, - include_attributes=False, - errors='raise'): +def h5todict( + h5file, + path="/", + exclude_names=None, + asarray=True, + dereference_links=True, + include_attributes=False, + errors="raise", +): """Read a HDF5 file and return a nested dictionary with the complete file structure and all data. @@ -581,20 +603,18 @@ def h5todict(h5file, with _SafeH5FileRead(h5file) as h5f: ddict = {} if path not in h5f: - _handle_error( - errors, KeyError, 'Path "%s" does not exist in file.', path) + _handle_error(errors, KeyError, 'Path "%s" does not exist in file.', path) return ddict try: root = h5f[path] except KeyError as e: if not isinstance(h5f.get(path, getlink=True), h5py.HardLink): - _handle_error(errors, - KeyError, - 'Cannot retrieve path "%s" (broken link)', - path) + _handle_error( + errors, KeyError, 'Cannot retrieve path "%s" (broken link)', path + ) else: - _handle_error(errors, KeyError, ', '.join(e.args)) + _handle_error(errors, KeyError, ", ".join(e.args)) return ddict # Read the attributes of the group @@ -618,31 +638,35 @@ def h5todict(h5file, h5obj = h5f[h5name] except KeyError as e: if not isinstance(h5f.get(h5name, getlink=True), h5py.HardLink): - _handle_error(errors, - KeyError, - 'Cannot retrieve path "%s" (broken link)', - h5name) + _handle_error( + errors, + KeyError, + 'Cannot retrieve path "%s" (broken link)', + h5name, + ) else: - _handle_error(errors, KeyError, ', '.join(e.args)) + _handle_error(errors, KeyError, ", ".join(e.args)) continue if is_group(h5obj): # Child is an HDF5 group - ddict[key] = h5todict(h5f, - h5name, - exclude_names=exclude_names, - asarray=asarray, - dereference_links=dereference_links, - include_attributes=include_attributes) + ddict[key] = h5todict( + h5f, + h5name, + exclude_names=exclude_names, + asarray=asarray, + dereference_links=dereference_links, + include_attributes=include_attributes, + errors=errors, + ) else: # Child is an HDF5 dataset try: data = h5py_read_dataset(h5obj) except OSError: - _handle_error(errors, - OSError, - 'Cannot retrieve dataset "%s"', - h5name) + _handle_error( + errors, OSError, 'Cannot retrieve dataset "%s"', h5name + ) else: if asarray: # Convert HDF5 dataset to numpy array data = numpy.array(data, copy=False) @@ -710,9 +734,7 @@ def dicttonx(treedict, h5file, h5path="/", add_nx_class=None, **kw): parents = tuple(p for p in h5path.split("/") if p) if add_nx_class is None: add_nx_class = kw.get("update_mode", None) in (None, "add") - nxtreedict = nexus_to_h5_dict( - treedict, parents=parents, add_nx_class=add_nx_class - ) + nxtreedict = nexus_to_h5_dict(treedict, parents=parents, add_nx_class=add_nx_class) dicttoh5(nxtreedict, h5file, h5path=h5path, **kw) @@ -788,7 +810,7 @@ def dump(ddict, ffile, mode="w", fmat=None): """ if fmat is None: # If file-like object get its name, else use ffile as filename - filename = getattr(ffile, 'name', ffile) + filename = getattr(ffile, "name", ffile) fmat = os.path.splitext(filename)[1][1:] # Strip extension leading '.' fmat = fmat.lower() @@ -805,7 +827,7 @@ def dump(ddict, ffile, mode="w", fmat=None): def load(ffile, fmat=None): """Load dictionary from a file - When loading from a JSON or INI file, an OrderedDict is returned to + When loading from a JSON or INI file, the returned dict preserve the values' insertion order. :param ffile: File name or file-like object with a ``read`` method @@ -813,7 +835,7 @@ def load(ffile, fmat=None): When None (the default), it uses the filename extension as the format. Loading from a HDF5 file requires `h5py <http://www.h5py.org/>`_ to be installed. - :return: Dictionary (ordered dictionary for JSON and INI) + :return: Dictionary :raises IOError: if file format is not supported """ must_be_closed = False @@ -831,7 +853,7 @@ def load(ffile, fmat=None): fmat = fmat.lower() if fmat == "json": - return json.load(f, object_pairs_hook=OrderedDict) + return json.load(f) if fmat in ["hdf5", "h5"]: return h5todict(fname) elif fmat in ["ini", "cfg"]: |