diff options
Diffstat (limited to 'silx/utils/array_like.py')
-rw-r--r-- | silx/utils/array_like.py | 593 |
1 files changed, 593 insertions, 0 deletions
diff --git a/silx/utils/array_like.py b/silx/utils/array_like.py new file mode 100644 index 0000000..f4c85bf --- /dev/null +++ b/silx/utils/array_like.py @@ -0,0 +1,593 @@ +# coding: utf-8 +# /*########################################################################## +# +# Copyright (c) 2016-2017 European Synchrotron Radiation Facility +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. +# +# ###########################################################################*/ +"""Functions and classes for array-like objects, implementing common numpy +array features for datasets or nested sequences, while trying to avoid copying +data. + +Classes: + + - :class:`DatasetView`: Similar to a numpy view, to access + a h5py dataset as if it was transposed, without casting it into a + numpy array (this lets h5py handle reading the data from the + file into memory, as needed). + - :class:`ListOfImages`: Similar to a numpy view, to access + a list of 2D numpy arrays as if it was a 3D array (possibly transposed), + without casting it into a numpy array. + +Functions: + + - :func:`is_array` + - :func:`is_list_of_arrays` + - :func:`is_nested_sequence` + - :func:`get_shape` + - :func:`get_dtype` + - :func:`get_concatenated_dtype` + +""" + +from __future__ import absolute_import, print_function, division +import numpy +import sys +from silx.third_party import six + +__authors__ = ["P. Knobel"] +__license__ = "MIT" +__date__ = "26/04/2017" + + +def is_array(obj): + """Return True if object implements necessary attributes to be + considered similar to a numpy array. + + Attributes needed are "shape", "dtype", "__getitem__" + and "__array__". + + :param obj: Array-like object (numpy array, h5py dataset...) + :return: boolean + """ + # add more required attribute if necessary + for attr in ("shape", "dtype", "__array__", "__getitem__"): + if not hasattr(obj, attr): + return False + return True + + +def is_list_of_arrays(obj): + """Return True if object is a sequence of numpy arrays, + e.g. a list of images as 2D arrays. + + :param obj: list of arrays + :return: boolean""" + # object must not be a numpy array + if is_array(obj): + return False + + # object must have a __len__ method + if not hasattr(obj, "__len__"): + return False + + # all elements in sequence must be arrays + for arr in obj: + if not is_array(arr): + return False + + return True + + +def is_nested_sequence(obj): + """Return True if object is a nested sequence. + + A simple 1D sequence is considered to be a nested sequence. + + Numpy arrays and h5py datasets are not considered to be nested sequences. + + To test if an object is a nested sequence in a more general sense, + including arrays and datasets, use:: + + is_nested_sequence(obj) or is_array(obj) + + :param obj: nested sequence (numpy array, h5py dataset...) + :return: boolean""" + # object must not be a numpy array + if is_array(obj): + return False + + if not hasattr(obj, "__len__"): + return False + + # obj must not be a list of (lists of) numpy arrays + subsequence = obj + while hasattr(subsequence, "__len__"): + if is_array(subsequence): + return False + # strings cause infinite loops + if isinstance(subsequence, six.string_types + (six.binary_type, )): + return True + subsequence = subsequence[0] + + # object has __len__ and is not an array + return True + + +def get_shape(array_like): + """Return shape of an array like object. + + In case the object is a nested sequence but not an array or dataset + (list of lists, tuples...), the size of each dimension is assumed to be + uniform, and is deduced from the length of the first sequence. + + :param array_like: Array like object: numpy array, hdf5 dataset, + multi-dimensional sequence + :return: Shape of array, as a tuple of integers + """ + if hasattr(array_like, "shape"): + return array_like.shape + + shape = [] + subsequence = array_like + while hasattr(subsequence, "__len__"): + shape.append(len(subsequence)) + # strings cause infinite loops + if isinstance(subsequence, six.string_types + (six.binary_type, )): + break + subsequence = subsequence[0] + + return tuple(shape) + + +def get_dtype(array_like): + """Return dtype of an array like object. + + In the case of a nested sequence, the type of the first value + is inspected. + + :param array_like: Array like object: numpy array, hdf5 dataset, + multi-dimensional nested sequence + :return: numpy dtype of object + """ + if hasattr(array_like, "dtype"): + return array_like.dtype + + subsequence = array_like + while hasattr(subsequence, "__len__"): + # strings cause infinite loops + if isinstance(subsequence, six.string_types + (six.binary_type, )): + break + subsequence = subsequence[0] + + return numpy.dtype(type(subsequence)) + + +def get_concatenated_dtype(arrays): + """Return dtype of array resulting of concatenation + of a list of arrays (without actually concatenating + them). + + :param arrays: list of numpy arrays + :return: resulting dtype after concatenating arrays + """ + dtypes = {a.dtype for a in arrays} + dummy = [] + for dt in dtypes: + dummy.append(numpy.zeros((1, 1), dtype=dt)) + return numpy.array(dummy).dtype + + +class ListOfImages(object): + """This class provides a way to access values and slices in a stack of + images stored as a list of 2D numpy arrays, without creating a 3D numpy + array first. + + A transposition can be specified, as a 3-tuple of dimensions in the wanted + order. For example, to transpose from ``xyz`` ``(0, 1, 2)`` into ``yzx``, + the transposition tuple is ``(1, 2, 0)`` + + All the 2D arrays in the list must have the same shape. + + The global dtype of the stack of images is the one that would be obtained + by casting the list of 2D arrays into a 3D numpy array. + + :param images: list of 2D numpy arrays, or :class:`ListOfImages` object + :param transposition: Tuple of dimension numbers in the wanted order + """ + def __init__(self, images, transposition=None): + """ + + """ + super(ListOfImages, self).__init__() + + # if images is a ListOfImages instance, get the underlying data + # as a list of 2D arrays + if isinstance(images, ListOfImages): + images = images.images + + # test stack of images is as expected + assert is_list_of_arrays(images), \ + "Image stack must be a list of arrays" + image0_shape = images[0].shape + for image in images: + assert image.ndim == 2, \ + "Images must be 2D numpy arrays" + assert image.shape == image0_shape, \ + "All images must have the same shape" + + self.images = images + """List of images""" + + self.shape = (len(images), ) + image0_shape + """Tuple of array dimensions""" + self.dtype = get_concatenated_dtype(images) + """Data-type of the global array""" + self.ndim = 3 + """Number of array dimensions""" + + self.size = len(images) * image0_shape[0] * image0_shape[1] + """Number of elements in the array.""" + + self.transposition = list(range(self.ndim)) + """List of dimension indices, in an order depending on the + specified transposition. By default this is simply + [0, ..., self.ndim], but it can be changed by specifying a different + ``transposition`` parameter at initialization. + + Use :meth:`transpose`, to create a new :class:`ListOfImages` + with a different :attr:`transposition`. + """ + + if transposition is not None: + assert len(transposition) == self.ndim + assert set(transposition) == set(list(range(self.ndim))), \ + "Transposition must be a sequence containing all dimensions" + self.transposition = transposition + self.__sort_shape() + + def __sort_shape(self): + """Sort shape in the order defined in :attr:`transposition` + """ + new_shape = tuple(self.shape[dim] for dim in self.transposition) + self.shape = new_shape + + def __sort_indices(self, indices): + """Return array indices sorted in the order needed + to access data in the original non-transposed images. + + :param indices: Tuple of ndim indices, in the order needed + to access the transposed view + :return: Sorted tuple of indices, to access original data + """ + assert len(indices) == self.ndim + sorted_indices = tuple(idx for (_, idx) in + sorted(zip(self.transposition, indices))) + return sorted_indices + + def __array__(self, dtype=None): + """Cast the images into a numpy array, and return it. + + If a transposition has been done on this images, return + a transposed view of a numpy array.""" + return numpy.transpose(numpy.array(self.images, dtype=dtype), + self.transposition) + + def __len__(self): + return self.shape[0] + + def transpose(self, transposition=None): + """Return a re-ordered (dimensions permutated) + :class:`ListOfImages`. + + The returned object refers to + the same images but with a different :attr:`transposition`. + + :param list[int] transposition: List/tuple of dimension numbers in the + wanted order. + If ``None`` (default), reverse the dimensions. + :return: new :class:`ListOfImages` object + """ + # by default, reverse the dimensions + if transposition is None: + transposition = list(reversed(self.transposition)) + + # If this ListOfImages is already transposed, sort new transposition + # relative to old transposition + elif list(self.transposition) != list(range(self.ndim)): + transposition = [self.transposition[i] for i in transposition] + + return ListOfImages(self.images, + transposition) + + @property + def T(self): + """ + Same as self.transpose() + + :return: DatasetView with dimensions reversed.""" + return self.transpose() + + def __getitem__(self, item): + """Handle a subset of numpy indexing with regards to the dimension + order as specified in :attr:`transposition` + + Following features are **not supported**: + + - fancy indexing using numpy arrays + - using ellipsis objects + + :param item: Index + :return: value or slice as a numpy array + """ + # 1-D slicing -> n-D slicing (n=1) + if not hasattr(item, "__len__"): + # first dimension index is given + item = [item] + # following dimensions are indexed with : (all elements) + item += [slice(None) for _i in range(self.ndim - 1)] + + # n-dimensional slicing + if len(item) != self.ndim: + raise IndexError( + "N-dim slicing requires a tuple of N indices/slices. " + + "Needed dimensions: %d" % self.ndim) + + # get list of indices sorted in the original images order + sorted_indices = self.__sort_indices(item) + list_idx, array_idx = sorted_indices[0], sorted_indices[1:] + + images_selection = self.images[list_idx] + + # now we must transpose the output data + output_dimensions = [] + frozen_dimensions = [] + for i, idx in enumerate(item): + # slices and sequences + if not isinstance(idx, int): + output_dimensions.append(self.transposition[i]) + # regular integer index + else: + # whenever a dimension is fixed (indexed by an integer) + # the number of output dimension is reduced + frozen_dimensions.append(self.transposition[i]) + + # decrement output dimensions that are above frozen dimensions + for frozen_dim in reversed(sorted(frozen_dimensions)): + for i, out_dim in enumerate(output_dimensions): + if out_dim > frozen_dim: + output_dimensions[i] -= 1 + + assert (len(output_dimensions) + len(frozen_dimensions)) == self.ndim + assert set(output_dimensions) == set(range(len(output_dimensions))) + + # single list elements selected + if isinstance(images_selection, numpy.ndarray): + return numpy.transpose(images_selection[array_idx], + axes=output_dimensions) + # muliple list elements selected + else: + # apply selection first + output_stack = [] + for img in images_selection: + output_stack.append(img[array_idx]) + # then cast into a numpy array, and transpose + return numpy.transpose(numpy.array(output_stack), + axes=output_dimensions) + + def min(self): + """ + :return: Global minimum value + """ + min_value = self.images[0].min() + if len(self.images) > 1: + for img in self.images[1:]: + min_value = min(min_value, img.min()) + return min_value + + def max(self): + """ + :return: Global maximum value + """ + max_value = self.images[0].max() + if len(self.images) > 1: + for img in self.images[1:]: + max_value = max(max_value, img.max()) + return max_value + + +class DatasetView(object): + """This class provides a way to transpose a dataset without + casting it into a numpy array. This way, the dataset in a file need not + necessarily be integrally read into memory to view it in a different + transposition. + + .. note:: + The performances depend a lot on the way the dataset was written + to file. Depending on the chunking strategy, reading a complete 2D slice + in an unfavorable direction may still require the entire dataset to + be read from disk. + + :param dataset: h5py dataset + :param transposition: List of dimensions sorted in the order of + transposition (relative to the original h5py dataset) + """ + def __init__(self, dataset, transposition=None): + """ + + """ + super(DatasetView, self).__init__() + self.dataset = dataset + """original dataset""" + + self.shape = dataset.shape + """Tuple of array dimensions""" + self.dtype = dataset.dtype + """Data-type of the array’s element""" + self.ndim = len(dataset.shape) + """Number of array dimensions""" + + size = 0 + if self.ndim: + size = 1 + for dimsize in self.shape: + size *= dimsize + self.size = size + """Number of elements in the array.""" + + self.transposition = list(range(self.ndim)) + """List of dimension indices, in an order depending on the + specified transposition. By default this is simply + [0, ..., self.ndim], but it can be changed by specifying a different + `transposition` parameter at initialization. + + Use :meth:`transpose`, to create a new :class:`DatasetView` + with a different :attr:`transposition`. + """ + + if transposition is not None: + assert len(transposition) == self.ndim + assert set(transposition) == set(list(range(self.ndim))), \ + "Transposition must be a list containing all dimensions" + self.transposition = transposition + self.__sort_shape() + + def __sort_shape(self): + """Sort shape in the order defined in :attr:`transposition` + """ + new_shape = tuple(self.shape[dim] for dim in self.transposition) + self.shape = new_shape + + def __sort_indices(self, indices): + """Return array indices sorted in the order needed + to access data in the original non-transposed dataset. + + :param indices: Tuple of ndim indices, in the order needed + to access the view + :return: Sorted tuple of indices, to access original data + """ + assert len(indices) == self.ndim + sorted_indices = tuple(idx for (_, idx) in + sorted(zip(self.transposition, indices))) + return sorted_indices + + def __getitem__(self, item): + """Handle fancy indexing with regards to the dimension order as + specified in :attr:`transposition` + + The supported fancy-indexing syntax is explained at + http://docs.h5py.org/en/latest/high/dataset.html#fancy-indexing. + + Additional restrictions exist if the data has been transposed: + + - numpy boolean array indexing is not supported + - ellipsis objects are not supported + + :param item: Index, possibly fancy index (must be supported by h5py) + :return: Sliced numpy array or numpy scalar + """ + # no transposition, let the original dataset handle indexing + if self.transposition == list(range(self.ndim)): + return self.dataset[item] + + # 1-D slicing: create a list of indices to switch to n-D slicing + if not hasattr(item, "__len__"): + # first dimension index (list index) is given + item = [item] + # following dimensions are indexed with slices representing all elements + item += [slice(None) for _i in range(self.ndim - 1)] + + # n-dimensional slicing + if len(item) != self.ndim: + raise IndexError( + "N-dim slicing requires a tuple of N indices/slices. " + + "Needed dimensions: %d" % self.ndim) + + # get list of indices sorted in the original dataset order + sorted_indices = self.__sort_indices(item) + + output_data_not_transposed = self.dataset[sorted_indices] + + # now we must transpose the output data + output_dimensions = [] + frozen_dimensions = [] + for i, idx in enumerate(item): + # slices and sequences + if not isinstance(idx, int): + output_dimensions.append(self.transposition[i]) + # regular integer index + else: + # whenever a dimension is fixed (indexed by an integer) + # the number of output dimension is reduced + frozen_dimensions.append(self.transposition[i]) + + # decrement output dimensions that are above frozen dimensions + for frozen_dim in reversed(sorted(frozen_dimensions)): + for i, out_dim in enumerate(output_dimensions): + if out_dim > frozen_dim: + output_dimensions[i] -= 1 + + assert (len(output_dimensions) + len(frozen_dimensions)) == self.ndim + assert set(output_dimensions) == set(range(len(output_dimensions))) + + return numpy.transpose(output_data_not_transposed, + axes=output_dimensions) + + def __array__(self, dtype=None): + """Cast the dataset into a numpy array, and return it. + + If a transposition has been done on this dataset, return + a transposed view of a numpy array.""" + return numpy.transpose(numpy.array(self.dataset, dtype=dtype), + self.transposition) + + def __len__(self): + return self.shape[0] + + def transpose(self, transposition=None): + """Return a re-ordered (dimensions permutated) + :class:`DatasetView`. + + The returned object refers to + the same dataset but with a different :attr:`transposition`. + + :param list[int] transposition: List of dimension numbers in the wanted order. + If ``None`` (default), reverse the dimensions. + :return: Transposed DatasetView + """ + # by default, reverse the dimensions + if transposition is None: + transposition = list(reversed(self.transposition)) + + # If this DatasetView is already transposed, sort new transposition + # relative to old transposition + elif list(self.transposition) != list(range(self.ndim)): + transposition = [self.transposition[i] for i in transposition] + + return DatasetView(self.dataset, + transposition) + + @property + def T(self): + """ + Same as self.transpose() + + :return: DatasetView with dimensions reversed.""" + return self.transpose() |