diff options
Diffstat (limited to 'silx/io/nxdata.py')
-rw-r--r-- | silx/io/nxdata.py | 535 |
1 files changed, 535 insertions, 0 deletions
diff --git a/silx/io/nxdata.py b/silx/io/nxdata.py new file mode 100644 index 0000000..c0e53fc --- /dev/null +++ b/silx/io/nxdata.py @@ -0,0 +1,535 @@ +# coding: utf-8 +# /*########################################################################## +# +# Copyright (c) 2017 European Synchrotron Radiation Facility +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. +# +# ###########################################################################*/ +"""This module provides a collection of functions to work with h5py-like +groups following the NeXus *NXdata* specification. + +See http://download.nexusformat.org/sphinx/classes/base_classes/NXdata.html + +""" +import logging +import numpy +from .utils import is_dataset, is_group +from silx.third_party import six + +_logger = logging.getLogger(__name__) + + +_INTERPDIM = {"scalar": 0, + "spectrum": 1, + "image": 2, + # "rgba-image": 3, "hsla-image": 3, "cmyk-image": 3, # TODO + "vertex": 1} # 3D scatter: 1D signal + 3 axes (x, y, z) of same legth +"""Number of signal dimensions associated to each possible @interpretation +attribute. +""" + + +def _nxdata_warning(msg): + """Log a warning message prefixed with + *"NXdata warning: "* + + :param str msg: Warning message + """ + _logger.warning("NXdata warning: " + msg) + + +def _get_attr(item, attr_name, default=None): + """Return item.attrs[attr_name]. If it is a byte-string or an array of + byte-strings, return it as a default python string. + + For Python 3, this involves a coercion from bytes into unicode. + For Python 2, there is nothing special to do, as strings are bytes. + + :param item: Group or dataset + :param attr_name: Attribute name + :return: item.attrs[attr_name] + """ + attr = item.attrs.get(attr_name, default) + if six.PY2: + return attr + if six.PY3: + if hasattr(attr, "decode"): + # byte-string + return attr.decode("ascii") + elif isinstance(attr, numpy.ndarray) and hasattr(attr[0], "decode"): + # array of byte-strinqs + return [element.decode("ascii") for element in attr] + else: + # attr is not a byte-strinq + return attr + + +def is_valid_nxdata(group): # noqa + """Check if a h5py group is a **valid** NX_data group. + + If the group does not have attribute *@NX_class=NXdata*, this function + simply returns *False*. + + Else, warning messages are logged to troubleshoot malformed NXdata groups + prior to returning *False*. + + :param group: h5py-like group + :return: True if this NXdata group is valid. + :raise: TypeError if group is not a h5py group, a spech5 group, + or a fabioh5 group + """ + if not is_group(group): + raise TypeError("group must be a h5py-like group") + if _get_attr(group, "NX_class") != "NXdata": + return False + if "signal" not in group.attrs: + _logger.warning("NXdata group does not define a signal attr.") + return False + + signal_name = _get_attr(group, "signal") + if signal_name not in group or not is_dataset(group[signal_name]): + _logger.warning( + "Cannot find signal dataset '%s' in NXdata group" % signal_name) + return False + + ndim = len(group[signal_name].shape) + + if "axes" in group.attrs: + axes_names = _get_attr(group, "axes") + if isinstance(axes_names, str): + axes_names = [axes_names] + + if 1 < ndim < len(axes_names): + # ndim = 1 and several axes could be a scatter + _nxdata_warning( + "More @axes defined than there are " + + "signal dimensions: " + + "%d axes, %d dimensions." % (len(axes_names), ndim)) + return False + + # case of less axes than dimensions: number of axes must match + # dimensionality defined by @interpretation + if ndim > len(axes_names): + interpretation = _get_attr(group[signal_name], "interpretation", None) + if interpretation is None: + interpretation = _get_attr(group, "interpretation", None) + if interpretation is None: + _nxdata_warning("No @interpretation and not enough" + + " @axes defined.") + return False + + if interpretation not in _INTERPDIM: + _nxdata_warning("Unrecognized @interpretation=" + interpretation + + " for data with wrong number of defined @axes.") + return False + + if len(axes_names) != _INTERPDIM[interpretation]: + _nxdata_warning( + "%d-D signal with @interpretation=%s " % (ndim, interpretation) + + "must define %d or %d axes." % (ndim, _INTERPDIM[interpretation])) + return False + + # Test consistency of @uncertainties + uncertainties_names = _get_attr(group, "uncertainties") + if uncertainties_names is None: + uncertainties_names = _get_attr(group[signal_name], "uncertainties") + if isinstance(uncertainties_names, str): + uncertainties_names = [uncertainties_names] + if uncertainties_names is not None: + if len(uncertainties_names) != len(axes_names): + _nxdata_warning("@uncertainties does not define the same " + + "number of fields than @axes") + return False + + # Test individual axes + is_scatter = True # true if all axes have the same size as the signal + signal_size = 1 + for dim in group[signal_name].shape: + signal_size *= dim + polynomial_axes_names = [] + for i, axis_name in enumerate(axes_names): + if axis_name == ".": + continue + if axis_name not in group or not is_dataset(group[axis_name]): + _nxdata_warning("Could not find axis dataset '%s'" % axis_name) + return False + + axis_size = 1 + for dim in group[axis_name].shape: + axis_size *= dim + + if len(group[axis_name].shape) != 1: + # too me, it makes only sense to have a n-D axis if it's total + # size is exactly the signal's size (weird n-d scatter) + if axis_size != signal_size: + _nxdata_warning("Axis %s is not a 1D dataset" % axis_name + + " and its shape does not match the signal's shape") + return False + axis_len = axis_size + else: + # for a 1-d axis, + fg_idx = _get_attr(group[axis_name], "first_good", 0) + lg_idx = _get_attr(group[axis_name], "last_good", len(group[axis_name]) - 1) + axis_len = lg_idx + 1 - fg_idx + + if axis_len != signal_size: + if axis_len not in group[signal_name].shape + (1, 2): + _nxdata_warning( + "Axis %s number of elements does not " % axis_name + + "correspond to the length of any signal dimension," + " it does not appear to be a constant or a linear calibration," + + " and this does not seem to be a scatter plot.") + return False + elif axis_len in (1, 2): + polynomial_axes_names.append(axis_name) + is_scatter = False + else: + if not is_scatter: + _nxdata_warning( + "Axis %s number of elements is equal " % axis_name + + "to the length of the signal, but this does not seem" + + " to be a scatter (other axes have different sizes)") + return False + + # Test individual uncertainties + errors_name = axis_name + "_errors" + if errors_name not in group and uncertainties_names is not None: + errors_name = uncertainties_names[i] + if errors_name in group and axis_name not in polynomial_axes_names: + if group[errors_name].shape != group[axis_name].shape: + _nxdata_warning( + "Errors '%s' does not have the same " % errors_name + + "dimensions as axis '%s'." % axis_name) + return False + + # test dimensions of errors associated with signal + if "errors" in group and is_dataset(group["errors"]): + if group["errors"].shape != group[signal_name].shape: + _nxdata_warning("Dataset containing standard deviations must " + + "have the same dimensions as the signal.") + return False + return True + + +class NXdata(object): + """ + + :param group: h5py-like group following the NeXus *NXdata* specification. + """ + def __init__(self, group): + if not is_valid_nxdata(group): + raise TypeError("group is not a valid NXdata class") + super(NXdata, self).__init__() + + self._is_scatter = None + self._axes = None + + self.group = group + """h5py-like group object compliant with NeXus NXdata specification. + """ + + self.signal = self.group[self.group.attrs["signal"]] + """Signal dataset in this NXdata group. + """ + + # ndim will be available in very recent h5py versions only + self.signal_ndim = getattr(self.signal, "ndim", + len(self.signal.shape)) + + self.signal_is_0d = self.signal_ndim == 0 + self.signal_is_1d = self.signal_ndim == 1 + self.signal_is_2d = self.signal_ndim == 2 + self.signal_is_3d = self.signal_ndim == 3 + + self.axes_names = [] + """List of axes names in a NXdata group. + + This attribute is similar to :attr:`axes_dataset_names` except that + if an axis dataset has a "@long_name" attribute, it will be used + instead of the dataset name. + """ + # check if axis dataset defines @long_name + for i, dsname in enumerate(self.axes_dataset_names): + if dsname is not None and "long_name" in self.group[dsname].attrs: + self.axes_names.append(self.group[dsname].attrs["long_name"]) + else: + self.axes_names.append(dsname) + + # excludes scatters + self.signal_is_1d = self.signal_is_1d and len(self.axes) <= 1 # excludes n-D scatters + + @property + def interpretation(self): + """*@interpretation* attribute associated with the *signal* + dataset of the NXdata group. ``None`` if no interpretation + attribute is present. + + The *interpretation* attribute provides information about the last + dimensions of the signal. The allowed values are: + + - *"scalar"*: 0-D data to be plotted + - *"spectrum"*: 1-D data to be plotted + - *"image"*: 2-D data to be plotted + - *"vertex"*: 3-D data to be plotted + + For example, a 3-D signal with interpretation *"spectrum"* should be + considered to be a 2-D array of 1-D data. A 3-D signal with + interpretation *"image"* should be interpreted as a 1-D array (a list) + of 2-D images. An n-D array with interpretation *"image"* should be + interpreted as an (n-2)-D array of images. + + A warning message is logged if the returned interpretation is not one + of the allowed values, but no error is raised and the unknown + interpretation is returned anyway. + """ + allowed_interpretations = [None, "scalar", "spectrum", "image", + # "rgba-image", "hsla-image", "cmyk-image" # TODO + "vertex"] + + interpretation = _get_attr(self.signal, "interpretation", None) + if interpretation is None: + interpretation = _get_attr(self.group, "interpretation", None) + + if interpretation not in allowed_interpretations: + _logger.warning("Interpretation %s is not valid." % interpretation + + " Valid values: " + ", ".join(allowed_interpretations)) + return interpretation + + @property + def axes(self): + """List of the axes datasets. + + The list typically has as many elements as there are dimensions in the + signal dataset, the exception being scatter plots which typically + use a 1D signal and several 1D axes of the same size. + + If an axis dataset applies to several dimensions of the signal, it + will be repeated in the list. + + If a dimension of the signal has no dimension scale (i.e. there is a + "." in that position in the *@axes* array), `None` is inserted in the + output list in its position. + + .. note:: + + In theory, the *@axes* attribute defines as many entries as there + are dimensions in the signal. In such a case, there is no ambiguity. + If this is not the case, this implementation relies on the existence + of an *@interpretation* (*spectrum* or *image*) attribute in the + *signal* dataset. + + .. note:: + + If an axis dataset defines attributes @first_good or @last_good, + the output will be a numpy array resulting from slicing that + axis to keep only the good index range: axis[first_good:last_good + 1] + + :rtype: list[Dataset or 1D array or None] + """ + if self._axes is not None: + # use cache + return self._axes + ndims = len(self.signal.shape) + axes_names = _get_attr(self.group, "axes") + interpretation = self.interpretation + + if axes_names is None: + self._axes = [None for _i in range(ndims)] + return self._axes + + if isinstance(axes_names, str): + axes_names = [axes_names] + + if len(axes_names) == ndims: + # axes is a list of strings, one axis per dim is explicitly defined + axes = [None] * ndims + for i, axis_n in enumerate(axes_names): + if axis_n != ".": + axes[i] = self.group[axis_n] + elif interpretation is not None: + # case of @interpretation attribute defined: we expect 1, 2 or 3 axes + # corresponding to the 1, 2, or 3 last dimensions of the signal + assert len(axes_names) == _INTERPDIM[interpretation] + axes = [None] * (ndims - _INTERPDIM[interpretation]) + for axis_n in axes_names: + if axis_n != ".": + axes.append(self.group[axis_n]) + else: + axes.append(None) + else: # scatter + axes = [] + for axis_n in axes_names: + if axis_n != ".": + axes.append(self.group[axis_n]) + else: + axes.append(None) + # keep only good range of axis data + for i, axis in enumerate(axes): + if axis is None: + continue + if "first_good" not in axis.attrs and "last_good" not in axis.attrs: + continue + fg_idx = _get_attr(axis, "first_good") or 0 + lg_idx = _get_attr(axis, "last_good") or (len(axis) - 1) + axes[i] = axis[fg_idx:lg_idx + 1] + + self._axes = axes + return self._axes + + @property + def axes_dataset_names(self): + """ + If an axis dataset applies to several dimensions of the signal, its + name will be repeated in the list. + + If a dimension of the signal has no dimension scale (i.e. there is a + "." in that position in the *@axes* array), `None` is inserted in the + output list in its position. + """ + axes_dataset_names = _get_attr(self.group, "axes") + if axes_dataset_names is None: + axes_dataset_names = _get_attr(self.group, "axes") + + ndims = len(self.signal.shape) + if axes_dataset_names is None: + return [None] * ndims + + if isinstance(axes_dataset_names, str): + axes_dataset_names = [axes_dataset_names] + + for i, axis_name in enumerate(axes_dataset_names): + if axis_name == ".": + axes_dataset_names[i] = None + + if len(axes_dataset_names) != ndims: + if self.is_scatter and ndims == 1: + return list(axes_dataset_names) + # @axes may only define 1 or 2 axes if @interpretation=spectrum/image. + # Use the existing names for the last few dims, and prepend with Nones. + assert len(axes_dataset_names) == _INTERPDIM[self.interpretation] + all_dimensions_names = [None] * (ndims - _INTERPDIM[self.interpretation]) + for axis_name in axes_dataset_names: + all_dimensions_names.append(axis_name) + return all_dimensions_names + + return list(axes_dataset_names) + + def get_axis_errors(self, axis_name): + """Return errors (uncertainties) associated with an axis. + + If the axis has attributes @first_good or @last_good, the output + is trimmed accordingly (a numpy array will be returned rather than a + dataset). + + :param str axis_name: Name of axis dataset. This dataset **must exist**. + :return: Dataset with axis errors, or None + :raise: KeyError if this group does not contain a dataset named axis_name + """ + if axis_name not in self.group: + # tolerate axis_name given as @long_name + for item in self.group: + long_name = _get_attr(self.group[item], "long_name") + if long_name is not None and long_name == axis_name: + axis_name = item + break + + if axis_name not in self.group: + raise KeyError("group does not contain a dataset named '%s'" % axis_name) + + len_axis = len(self.group[axis_name]) + + fg_idx = _get_attr(self.group[axis_name], "first_good", 0) + lg_idx = _get_attr(self.group[axis_name], "last_good", len_axis - 1) + + # case of axisname_errors dataset present + errors_name = axis_name + "_errors" + if errors_name in self.group and is_dataset(self.group[errors_name]): + if fg_idx != 0 or lg_idx != (len_axis-1): + return self.group[errors_name][fg_idx:lg_idx + 1] + else: + return self.group[errors_name] + # case of uncertainties dataset name provided in @uncertainties + uncertainties_names = _get_attr(self.group, "uncertainties") + if uncertainties_names is None: + uncertainties_names = _get_attr(self.signal, "uncertainties") + if isinstance(uncertainties_names, str): + uncertainties_names = [uncertainties_names] + if uncertainties_names is not None: + # take the uncertainty with the same index as the axis in @axes + axes_ds_names = _get_attr(self.group, "axes") + if axes_ds_names is None: + axes_ds_names = _get_attr(self.signal, "axes") + if isinstance(axes_ds_names, str): + axes_ds_names = [axes_ds_names] + elif not isinstance(axes_ds_names, list): + # transform numpy.ndarray(dtype('S21')) into list(str) + axes_ds_names = map(str, axes_ds_names) + if axis_name not in axes_ds_names: + raise KeyError("group attr @axes does not mention a dataset " + + "named '%s'" % axis_name) + errors = self.group[uncertainties_names[list(axes_ds_names).index(axis_name)]] + if fg_idx == 0 and lg_idx == (len_axis-1): + return errors # dataset + else: + return errors[fg_idx:lg_idx + 1] # numpy array + return None + + @property + def errors(self): + """Return errors (uncertainties) associated with the signal values. + + :return: Dataset with errors, or None + """ + if "errors" not in self.group: + return None + return self.group["errors"] + + @property + def is_scatter(self): + """True if the signal is 1D and all the axes have the + same size as the signal.""" + if self._is_scatter is not None: + return self._is_scatter + if not self.signal_is_1d: + self._is_scatter = False + else: + self._is_scatter = True + sigsize = 1 + for dim in self.signal.shape: + sigsize *= dim + for axis in self.axes: + if axis is None: + continue + axis_size = 1 + for dim in axis.shape: + axis_size *= dim + self._is_scatter = self._is_scatter and (axis_size == sigsize) + return self._is_scatter + + @property + def is_x_y_value_scatter(self): + """True if this is a scatter with a signal and two axes.""" + return self.is_scatter and len(self.axes) == 2 + + # we currently have no widget capable of plotting 4D data + @property + def is_unsupported_scatter(self): + """True if this is a scatter with a signal and more than 2 axes.""" + return self.is_scatter and len(self.axes) > 2 |