summaryrefslogtreecommitdiff
path: root/silx/io/dictdump.py
diff options
context:
space:
mode:
Diffstat (limited to 'silx/io/dictdump.py')
-rw-r--r--silx/io/dictdump.py125
1 files changed, 118 insertions, 7 deletions
diff --git a/silx/io/dictdump.py b/silx/io/dictdump.py
index da1bc5c..f2318e0 100644
--- a/silx/io/dictdump.py
+++ b/silx/io/dictdump.py
@@ -1,6 +1,6 @@
# coding: utf-8
# /*##########################################################################
-# Copyright (C) 2016-2019 European Synchrotron Radiation Facility
+# Copyright (C) 2016-2020 European Synchrotron Radiation Facility
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
@@ -154,14 +154,19 @@ def dicttoh5(treedict, h5file, h5path='/',
any other data type, it is cast into a numpy array and written as a
:mod:`h5py` dataset. Dictionary keys must be strings and cannot contain
the ``/`` character.
+
+ If dictionary keys are tuples they are interpreted to set h5 attributes.
+ The tuples should have the format (dataset_name,attr_name)
.. note::
This function requires `h5py <http://www.h5py.org/>`_ to be installed.
- :param treedict: Nested dictionary/tree structure with strings as keys
- and array-like objects as leafs. The ``"/"`` character is not allowed
- in keys.
+ :param treedict: Nested dictionary/tree structure with strings or tuples as
+ keys and array-like objects as leafs. The ``"/"`` character can be used
+ to define sub trees. If tuples are used as keys they should have the
+ format (dataset_name,attr_name) and will add a 5h attribute with the
+ corresponding value.
:param h5file: HDF5 file name or handle. If a file name is provided, the
function opens the file in the specified mode and closes it again
before completing.
@@ -186,10 +191,12 @@ def dicttoh5(treedict, h5file, h5path='/',
"Europe": {
"France": {
"Isère": {
- "Grenoble": "18.44 km2"
+ "Grenoble": 18.44,
+ ("Grenoble","unit"): "km2"
},
"Nord": {
- "Tourcoing": "15.19 km2"
+ "Tourcoing": 15.19,
+ ("Tourcoing","unit"): "km2"
},
},
},
@@ -207,7 +214,11 @@ def dicttoh5(treedict, h5file, h5path='/',
h5path += "/"
with _SafeH5FileWrite(h5file, mode=mode) as h5f:
- for key in treedict:
+ if isinstance(treedict, dict) and h5path != "/":
+ if h5path not in h5f:
+ h5f.create_group(h5path)
+
+ for key in filter(lambda k: not isinstance(k, tuple), treedict):
if isinstance(treedict[key], dict) and len(treedict[key]):
# non-empty group: recurse
dicttoh5(treedict[key], h5f, h5path + key,
@@ -253,6 +264,106 @@ def dicttoh5(treedict, h5file, h5path='/',
data=ds,
**create_dataset_args)
+ # deal with h5 attributes which have tuples as keys in treedict
+ for key in filter(lambda k: isinstance(k, tuple), treedict):
+ if (h5path + key[0]) not in h5f:
+ # Create empty group if key for attr does not exist
+ h5f.create_group(h5path + key[0])
+ logger.warning(
+ "key (%s) does not exist. attr %s "
+ "will be written to ." % (h5path + key[0], key[1])
+ )
+
+ if key[1] in h5f[h5path + key[0]].attrs:
+ if not overwrite_data:
+ logger.warning(
+ "attribute %s@%s already exists. Not overwriting."
+ "" % (h5path + key[0], key[1])
+ )
+ continue
+
+ # Write attribute
+ value = treedict[key]
+
+ # Makes list/tuple of str being encoded as vlen unicode array
+ # Workaround for h5py<2.9.0 (e.g. debian 10).
+ if (isinstance(value, (list, tuple)) and
+ numpy.asarray(value).dtype.type == numpy.unicode_):
+ value = numpy.array(value, dtype=h5py.special_dtype(vlen=str))
+
+ h5f[h5path + key[0]].attrs[key[1]] = value
+
+
+def dicttonx(
+ treedict,
+ h5file,
+ h5path="/",
+ mode="w",
+ overwrite_data=False,
+ create_dataset_args=None,
+):
+ """
+ Write a nested dictionary to a HDF5 file, using string keys as member names.
+ The NeXus convention is used to identify attributes with ``"@"`` character,
+ therefor the dataset_names should not contain ``"@"``.
+
+ :param treedict: Nested dictionary/tree structure with strings as keys
+ and array-like objects as leafs. The ``"/"`` character can be used
+ to define sub tree. The ``"@"`` character is used to write attributes.
+
+ Detais on all other params can be found in doc of dicttoh5.
+
+ Example::
+
+ import numpy
+ from silx.io.dictdump import dicttonx
+
+ gauss = {
+ "entry":{
+ "title":u"A plot of a gaussian",
+ "plot": {
+ "y": numpy.array([0.08, 0.19, 0.39, 0.66, 0.9, 1.,
+ 0.9, 0.66, 0.39, 0.19, 0.08]),
+ "x": numpy.arange(0,1.1,.1),
+ "@signal": "y",
+ "@axes": "x",
+ "@NX_class":u"NXdata",
+ "title:u"Gauss Plot",
+ },
+ "@NX_class":u"NXentry",
+ "default":"plot",
+ }
+ "@NX_class": u"NXroot",
+ "@default": "entry",
+ }
+
+ dicttonx(gauss,"test.h5")
+ """
+
+ def copy_keys_keep_values(original):
+ # create a new treedict with with modified keys but keep values
+ copy = dict()
+ for key, value in original.items():
+ if "@" in key:
+ newkey = tuple(key.rsplit("@", 1))
+ else:
+ newkey = key
+ if isinstance(value, dict):
+ copy[newkey] = copy_keys_keep_values(value)
+ else:
+ copy[newkey] = value
+ return copy
+
+ nxtreedict = copy_keys_keep_values(treedict)
+ dicttoh5(
+ nxtreedict,
+ h5file,
+ h5path=h5path,
+ mode=mode,
+ overwrite_data=overwrite_data,
+ create_dataset_args=create_dataset_args,
+ )
+
def _name_contains_string_in_list(name, strlist):
if strlist is None: