summaryrefslogtreecommitdiff
path: root/silx/io/dictdump.py
diff options
context:
space:
mode:
Diffstat (limited to 'silx/io/dictdump.py')
-rw-r--r--silx/io/dictdump.py366
1 files changed, 366 insertions, 0 deletions
diff --git a/silx/io/dictdump.py b/silx/io/dictdump.py
new file mode 100644
index 0000000..ae7a457
--- /dev/null
+++ b/silx/io/dictdump.py
@@ -0,0 +1,366 @@
+# coding: utf-8
+# /*##########################################################################
+# Copyright (C) 2016-2017 European Synchrotron Radiation Facility
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+# THE SOFTWARE.
+#
+# ############################################################################*/
+"""This module offers a set of functions to dump a python dictionary indexed
+by text strings to following file formats: `HDF5, INI, JSON`
+"""
+
+from collections import OrderedDict
+import json
+import logging
+import numpy
+import os.path
+import sys
+
+try:
+ import h5py
+except ImportError as e:
+ h5py_missing = True
+ h5py_import_error = e
+else:
+ h5py_missing = False
+
+from .configdict import ConfigDict
+from .utils import is_group, is_file
+
+from silx.io import open as h5open
+
+__authors__ = ["P. Knobel"]
+__license__ = "MIT"
+__date__ = "10/02/2017"
+
+logger = logging.getLogger(__name__)
+
+string_types = (basestring,) if sys.version_info[0] == 2 else (str,) # noqa
+
+
+def _prepare_hdf5_dataset(array_like):
+ """Cast a python object into a numpy array in a HDF5 friendly format.
+
+ :param array_like: Input dataset in a type that can be digested by
+ ``numpy.array()`` (`str`, `list`, `numpy.ndarray`…)
+ :return: ``numpy.ndarray`` ready to be written as an HDF5 dataset
+ """
+ # simple strings
+ if isinstance(array_like, string_types):
+ array_like = numpy.string_(array_like)
+
+ # Ensure our data is a numpy.ndarray
+ if not isinstance(array_like, (numpy.ndarray, numpy.string_)):
+ array = numpy.array(array_like)
+ else:
+ array = array_like
+
+ # handle list of strings or numpy array of strings
+ if not isinstance(array, numpy.string_):
+ data_kind = array.dtype.kind
+ # unicode: convert to byte strings
+ # (http://docs.h5py.org/en/latest/strings.html)
+ if data_kind.lower() in ["s", "u"]:
+ array = numpy.asarray(array, dtype=numpy.string_)
+
+ return array
+
+
+def dicttoh5(treedict, h5file, h5path='/',
+ mode="w", overwrite_data=False,
+ create_dataset_args=None):
+ """Write a nested dictionary to a HDF5 file, using keys as member names.
+
+ If a dictionary value is a sub-dictionary, a group is created. If it is
+ any other data type, it is cast into a numpy array and written as a
+ :mod:`h5py` dataset. Dictionary keys must be strings and cannot contain
+ the ``/`` character.
+
+ .. note::
+
+ This function requires `h5py <http://www.h5py.org/>`_ to be installed.
+
+ :param treedict: Nested dictionary/tree structure with strings as keys
+ and array-like objects as leafs. The ``"/"`` character is not allowed
+ in keys.
+ :param h5file: HDF5 file name or handle. If a file name is provided, the
+ function opens the file in the specified mode and closes it again
+ before completing.
+ :param h5path: Target path in HDF5 file in which scan groups are created.
+ Default is root (``"/"``)
+ :param mode: Can be ``"r+"`` (read/write, file must exist),
+ ``"w"`` (write, existing file is lost), ``"w-"`` (write, fail if
+ exists) or ``"a"`` (read/write if exists, create otherwise).
+ This parameter is ignored if ``h5file`` is a file handle.
+ :param overwrite_data: If ``True``, existing groups and datasets can be
+ overwritten, if ``False`` they are skipped. This parameter is only
+ relevant if ``h5file_mode`` is ``"r+"`` or ``"a"``.
+ :param create_dataset_args: Dictionary of args you want to pass to
+ ``h5f.create_dataset``. This allows you to specify filters and
+ compression parameters. Don't specify ``name`` and ``data``.
+
+ Example::
+
+ from silx.io.dictdump import dicttoh5
+
+ city_area = {
+ "Europe": {
+ "France": {
+ "Isère": {
+ "Grenoble": "18.44 km2"
+ },
+ "Nord": {
+ "Tourcoing": "15.19 km2"
+ },
+ },
+ },
+ }
+
+ create_ds_args = {'compression': "gzip",
+ 'shuffle': True,
+ 'fletcher32': True}
+
+ dicttoh5(city_area, "cities.h5", h5path="/area",
+ create_dataset_args=create_ds_args)
+ """
+ if h5py_missing:
+ raise h5py_import_error
+
+ if not isinstance(h5file, h5py.File):
+ h5f = h5py.File(h5file, mode)
+ else:
+ h5f = h5file
+
+ if not h5path.endswith("/"):
+ h5path += "/"
+
+ for key in treedict:
+
+ if isinstance(treedict[key], dict) and len(treedict[key]):
+ # non-empty group: recurse
+ dicttoh5(treedict[key], h5f, h5path + key,
+ overwrite_data=overwrite_data,
+ create_dataset_args=create_dataset_args)
+
+ elif treedict[key] is None or (isinstance(treedict[key], dict) and
+ not len(treedict[key])):
+ # Create empty group
+ h5f.create_group(h5path + key)
+
+ else:
+ ds = _prepare_hdf5_dataset(treedict[key])
+ # can't apply filters on scalars (datasets with shape == () )
+ if ds.shape == () or create_dataset_args is None:
+ h5f.create_dataset(h5path + key,
+ data=ds)
+ else:
+ h5f.create_dataset(h5path + key,
+ data=ds,
+ **create_dataset_args)
+
+ if isinstance(h5file, string_types):
+ h5f.close()
+
+
+def _name_contains_string_in_list(name, strlist):
+ if strlist is None:
+ return False
+ for filter_str in strlist:
+ if filter_str in name:
+ return True
+ return False
+
+
+def h5todict(h5file, path="/", exclude_names=None):
+ """Read a HDF5 file and return a nested dictionary with the complete file
+ structure and all data.
+
+ Example of usage::
+
+ from silx.io.dictdump import h5todict
+
+ # initialize dict with file header and scan header
+ header94 = h5todict("oleg.dat",
+ "/94.1/instrument/specfile")
+ # add positioners subdict
+ header94["positioners"] = h5todict("oleg.dat",
+ "/94.1/instrument/positioners")
+ # add scan data without mca data
+ header94["detector data"] = h5todict("oleg.dat",
+ "/94.1/measurement",
+ exclude_names="mca_")
+
+
+ .. note:: This function requires `h5py <http://www.h5py.org/>`_ to be
+ installed.
+
+ .. note:: If you write a dictionary to a HDF5 file with
+ :func:`dicttoh5` and then read it back with :func:`h5todict`, data
+ types are not preserved. All values are cast to numpy arrays before
+ being written to file, and they are read back as numpy arrays (or
+ scalars). In some cases, you may find that a list of heterogeneous
+ data types is converted to a numpy array of strings.
+
+ :param h5file: File name or :class:`h5py.File` object or spech5 file or
+ fabioh5 file.
+ :param str path: Name of HDF5 group to use as dictionary root level,
+ to read only a sub-group in the file
+ :param list[str] exclude_names: Groups and datasets whose name contains
+ a string in this list will be ignored. Default is None (ignore nothing)
+ :return: Nested dictionary
+ """
+ if h5py_missing:
+ raise h5py_import_error
+
+ if not is_file(h5file):
+ h5f = h5open(h5file)
+ else:
+ h5f = h5file
+
+ ddict = {}
+ for key in h5f[path]:
+ if _name_contains_string_in_list(key, exclude_names):
+ continue
+ if is_group(h5f[path + "/" + key]):
+ ddict[key] = h5todict(h5f,
+ path + "/" + key,
+ exclude_names=exclude_names)
+ else:
+ # Convert HDF5 dataset to numpy array
+ ddict[key] = h5f[path + "/" + key][...]
+
+ if not is_file(h5file):
+ # close file, if we opened it
+ h5f.close()
+
+ return ddict
+
+
+def dicttojson(ddict, jsonfile, indent=None, mode="w"):
+ """Serialize ``ddict`` as a JSON formatted stream to ``jsonfile``.
+
+ :param ddict: Dictionary (or any object compatible with ``json.dump``).
+ :param jsonfile: JSON file name or file-like object.
+ If a file name is provided, the function opens the file in the
+ specified mode and closes it again.
+ :param indent: If indent is a non-negative integer, then JSON array
+ elements and object members will be pretty-printed with that indent
+ level. An indent level of ``0`` will only insert newlines.
+ ``None`` (the default) selects the most compact representation.
+ :param mode: File opening mode (``w``, ``a``, ``w+``…)
+ """
+ if not hasattr(jsonfile, "write"):
+ jsonf = open(jsonfile, mode)
+ else:
+ jsonf = jsonfile
+
+ json.dump(ddict, jsonf, indent=indent)
+
+ if not hasattr(jsonfile, "write"):
+ jsonf.close()
+
+
+def dicttoini(ddict, inifile, mode="w"):
+ """Output dict as configuration file (similar to Microsoft Windows INI).
+
+ :param dict: Dictionary of configuration parameters
+ :param inifile: INI file name or file-like object.
+ If a file name is provided, the function opens the file in the
+ specified mode and closes it again.
+ :param mode: File opening mode (``w``, ``a``, ``w+``…)
+ """
+ if not hasattr(inifile, "write"):
+ inif = open(inifile, mode)
+ else:
+ inif = inifile
+
+ ConfigDict(initdict=ddict).write(inif)
+
+ if not hasattr(inifile, "write"):
+ inif.close()
+
+
+def dump(ddict, ffile, mode="w", fmat=None):
+ """Dump dictionary to a file
+
+ :param ddict: Dictionary with string keys
+ :param ffile: File name or file-like object with a ``write`` method
+ :param str fmat: Output format: ``"json"``, ``"hdf5"`` or ``"ini"``.
+ When None (the default), it uses the filename extension as the format.
+ Dumping to a HDF5 file requires `h5py <http://www.h5py.org/>`_ to be
+ installed.
+ :param str mode: File opening mode (``w``, ``a``, ``w+``…)
+ Default is *"w"*, write mode, overwrite if exists.
+ :raises IOError: if file format is not supported
+ """
+ if fmat is None:
+ # If file-like object get its name, else use ffile as filename
+ filename = getattr(ffile, 'name', ffile)
+ fmat = os.path.splitext(filename)[1][1:] # Strip extension leading '.'
+ fmat = fmat.lower()
+
+ if fmat == "json":
+ dicttojson(ddict, ffile, indent=2, mode=mode)
+ elif fmat in ["hdf5", "h5"]:
+ if h5py_missing:
+ logger.error("Cannot dump to HDF5 format, missing h5py library")
+ raise h5py_import_error
+ dicttoh5(ddict, ffile, mode=mode)
+ elif fmat in ["ini", "cfg"]:
+ dicttoini(ddict, ffile, mode=mode)
+ else:
+ raise IOError("Unknown format " + fmat)
+
+
+def load(ffile, fmat=None):
+ """Load dictionary from a file
+
+ When loading from a JSON or INI file, an OrderedDict is returned to
+ preserve the values' insertion order.
+
+ :param ffile: File name or file-like object with a ``read`` method
+ :param fmat: Input format: ``json``, ``hdf5`` or ``ini``.
+ When None (the default), it uses the filename extension as the format.
+ Loading from a HDF5 file requires `h5py <http://www.h5py.org/>`_ to be
+ installed.
+ :return: Dictionary (ordered dictionary for JSON and INI)
+ :raises IOError: if file format is not supported
+ """
+ if not hasattr(ffile, "read"):
+ f = open(ffile, "r")
+ fname = ffile
+ else:
+ f = ffile
+ fname = ffile.name
+
+ if fmat is None: # Use file extension as format
+ fmat = os.path.splitext(fname)[1][1:] # Strip extension leading '.'
+ fmat = fmat.lower()
+
+ if fmat == "json":
+ return json.load(f, object_pairs_hook=OrderedDict)
+ if fmat in ["hdf5", "h5"]:
+ if h5py_missing:
+ logger.error("Cannot load from HDF5 format, missing h5py library")
+ raise h5py_import_error
+ return h5todict(fname)
+ elif fmat in ["ini", "cfg"]:
+ return ConfigDict(filelist=[fname])
+ else:
+ raise IOError("Unknown format " + fmat)