summaryrefslogtreecommitdiff
path: root/silx/utils/array_like.py
diff options
context:
space:
mode:
Diffstat (limited to 'silx/utils/array_like.py')
-rw-r--r--silx/utils/array_like.py596
1 files changed, 0 insertions, 596 deletions
diff --git a/silx/utils/array_like.py b/silx/utils/array_like.py
deleted file mode 100644
index 1a2e72e..0000000
--- a/silx/utils/array_like.py
+++ /dev/null
@@ -1,596 +0,0 @@
-# coding: utf-8
-# /*##########################################################################
-#
-# Copyright (c) 2016-2018 European Synchrotron Radiation Facility
-#
-# Permission is hereby granted, free of charge, to any person obtaining a copy
-# of this software and associated documentation files (the "Software"), to deal
-# in the Software without restriction, including without limitation the rights
-# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-# copies of the Software, and to permit persons to whom the Software is
-# furnished to do so, subject to the following conditions:
-#
-# The above copyright notice and this permission notice shall be included in
-# all copies or substantial portions of the Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-# THE SOFTWARE.
-#
-# ###########################################################################*/
-"""Functions and classes for array-like objects, implementing common numpy
-array features for datasets or nested sequences, while trying to avoid copying
-data.
-
-Classes:
-
- - :class:`DatasetView`: Similar to a numpy view, to access
- a h5py dataset as if it was transposed, without casting it into a
- numpy array (this lets h5py handle reading the data from the
- file into memory, as needed).
- - :class:`ListOfImages`: Similar to a numpy view, to access
- a list of 2D numpy arrays as if it was a 3D array (possibly transposed),
- without casting it into a numpy array.
-
-Functions:
-
- - :func:`is_array`
- - :func:`is_list_of_arrays`
- - :func:`is_nested_sequence`
- - :func:`get_shape`
- - :func:`get_dtype`
- - :func:`get_concatenated_dtype`
-
-"""
-
-from __future__ import absolute_import, print_function, division
-
-import sys
-
-import numpy
-import six
-import numbers
-
-__authors__ = ["P. Knobel"]
-__license__ = "MIT"
-__date__ = "26/04/2017"
-
-
-def is_array(obj):
- """Return True if object implements necessary attributes to be
- considered similar to a numpy array.
-
- Attributes needed are "shape", "dtype", "__getitem__"
- and "__array__".
-
- :param obj: Array-like object (numpy array, h5py dataset...)
- :return: boolean
- """
- # add more required attribute if necessary
- for attr in ("shape", "dtype", "__array__", "__getitem__"):
- if not hasattr(obj, attr):
- return False
- return True
-
-
-def is_list_of_arrays(obj):
- """Return True if object is a sequence of numpy arrays,
- e.g. a list of images as 2D arrays.
-
- :param obj: list of arrays
- :return: boolean"""
- # object must not be a numpy array
- if is_array(obj):
- return False
-
- # object must have a __len__ method
- if not hasattr(obj, "__len__"):
- return False
-
- # all elements in sequence must be arrays
- for arr in obj:
- if not is_array(arr):
- return False
-
- return True
-
-
-def is_nested_sequence(obj):
- """Return True if object is a nested sequence.
-
- A simple 1D sequence is considered to be a nested sequence.
-
- Numpy arrays and h5py datasets are not considered to be nested sequences.
-
- To test if an object is a nested sequence in a more general sense,
- including arrays and datasets, use::
-
- is_nested_sequence(obj) or is_array(obj)
-
- :param obj: nested sequence (numpy array, h5py dataset...)
- :return: boolean"""
- # object must not be a numpy array
- if is_array(obj):
- return False
-
- if not hasattr(obj, "__len__"):
- return False
-
- # obj must not be a list of (lists of) numpy arrays
- subsequence = obj
- while hasattr(subsequence, "__len__"):
- if is_array(subsequence):
- return False
- # strings cause infinite loops
- if isinstance(subsequence, six.string_types + (six.binary_type, )):
- return True
- subsequence = subsequence[0]
-
- # object has __len__ and is not an array
- return True
-
-
-def get_shape(array_like):
- """Return shape of an array like object.
-
- In case the object is a nested sequence but not an array or dataset
- (list of lists, tuples...), the size of each dimension is assumed to be
- uniform, and is deduced from the length of the first sequence.
-
- :param array_like: Array like object: numpy array, hdf5 dataset,
- multi-dimensional sequence
- :return: Shape of array, as a tuple of integers
- """
- if hasattr(array_like, "shape"):
- return array_like.shape
-
- shape = []
- subsequence = array_like
- while hasattr(subsequence, "__len__"):
- shape.append(len(subsequence))
- # strings cause infinite loops
- if isinstance(subsequence, six.string_types + (six.binary_type, )):
- break
- subsequence = subsequence[0]
-
- return tuple(shape)
-
-
-def get_dtype(array_like):
- """Return dtype of an array like object.
-
- In the case of a nested sequence, the type of the first value
- is inspected.
-
- :param array_like: Array like object: numpy array, hdf5 dataset,
- multi-dimensional nested sequence
- :return: numpy dtype of object
- """
- if hasattr(array_like, "dtype"):
- return array_like.dtype
-
- subsequence = array_like
- while hasattr(subsequence, "__len__"):
- # strings cause infinite loops
- if isinstance(subsequence, six.string_types + (six.binary_type, )):
- break
- subsequence = subsequence[0]
-
- return numpy.dtype(type(subsequence))
-
-
-def get_concatenated_dtype(arrays):
- """Return dtype of array resulting of concatenation
- of a list of arrays (without actually concatenating
- them).
-
- :param arrays: list of numpy arrays
- :return: resulting dtype after concatenating arrays
- """
- dtypes = {a.dtype for a in arrays}
- dummy = []
- for dt in dtypes:
- dummy.append(numpy.zeros((1, 1), dtype=dt))
- return numpy.array(dummy).dtype
-
-
-class ListOfImages(object):
- """This class provides a way to access values and slices in a stack of
- images stored as a list of 2D numpy arrays, without creating a 3D numpy
- array first.
-
- A transposition can be specified, as a 3-tuple of dimensions in the wanted
- order. For example, to transpose from ``xyz`` ``(0, 1, 2)`` into ``yzx``,
- the transposition tuple is ``(1, 2, 0)``
-
- All the 2D arrays in the list must have the same shape.
-
- The global dtype of the stack of images is the one that would be obtained
- by casting the list of 2D arrays into a 3D numpy array.
-
- :param images: list of 2D numpy arrays, or :class:`ListOfImages` object
- :param transposition: Tuple of dimension numbers in the wanted order
- """
- def __init__(self, images, transposition=None):
- """
-
- """
- super(ListOfImages, self).__init__()
-
- # if images is a ListOfImages instance, get the underlying data
- # as a list of 2D arrays
- if isinstance(images, ListOfImages):
- images = images.images
-
- # test stack of images is as expected
- assert is_list_of_arrays(images), \
- "Image stack must be a list of arrays"
- image0_shape = images[0].shape
- for image in images:
- assert image.ndim == 2, \
- "Images must be 2D numpy arrays"
- assert image.shape == image0_shape, \
- "All images must have the same shape"
-
- self.images = images
- """List of images"""
-
- self.shape = (len(images), ) + image0_shape
- """Tuple of array dimensions"""
- self.dtype = get_concatenated_dtype(images)
- """Data-type of the global array"""
- self.ndim = 3
- """Number of array dimensions"""
-
- self.size = len(images) * image0_shape[0] * image0_shape[1]
- """Number of elements in the array."""
-
- self.transposition = list(range(self.ndim))
- """List of dimension indices, in an order depending on the
- specified transposition. By default this is simply
- [0, ..., self.ndim], but it can be changed by specifying a different
- ``transposition`` parameter at initialization.
-
- Use :meth:`transpose`, to create a new :class:`ListOfImages`
- with a different :attr:`transposition`.
- """
-
- if transposition is not None:
- assert len(transposition) == self.ndim
- assert set(transposition) == set(list(range(self.ndim))), \
- "Transposition must be a sequence containing all dimensions"
- self.transposition = transposition
- self.__sort_shape()
-
- def __sort_shape(self):
- """Sort shape in the order defined in :attr:`transposition`
- """
- new_shape = tuple(self.shape[dim] for dim in self.transposition)
- self.shape = new_shape
-
- def __sort_indices(self, indices):
- """Return array indices sorted in the order needed
- to access data in the original non-transposed images.
-
- :param indices: Tuple of ndim indices, in the order needed
- to access the transposed view
- :return: Sorted tuple of indices, to access original data
- """
- assert len(indices) == self.ndim
- sorted_indices = tuple(idx for (_, idx) in
- sorted(zip(self.transposition, indices)))
- return sorted_indices
-
- def __array__(self, dtype=None):
- """Cast the images into a numpy array, and return it.
-
- If a transposition has been done on this images, return
- a transposed view of a numpy array."""
- return numpy.transpose(numpy.array(self.images, dtype=dtype),
- self.transposition)
-
- def __len__(self):
- return self.shape[0]
-
- def transpose(self, transposition=None):
- """Return a re-ordered (dimensions permutated)
- :class:`ListOfImages`.
-
- The returned object refers to
- the same images but with a different :attr:`transposition`.
-
- :param List[int] transposition: List/tuple of dimension numbers in the
- wanted order.
- If ``None`` (default), reverse the dimensions.
- :return: new :class:`ListOfImages` object
- """
- # by default, reverse the dimensions
- if transposition is None:
- transposition = list(reversed(self.transposition))
-
- # If this ListOfImages is already transposed, sort new transposition
- # relative to old transposition
- elif list(self.transposition) != list(range(self.ndim)):
- transposition = [self.transposition[i] for i in transposition]
-
- return ListOfImages(self.images,
- transposition)
-
- @property
- def T(self):
- """
- Same as self.transpose()
-
- :return: DatasetView with dimensions reversed."""
- return self.transpose()
-
- def __getitem__(self, item):
- """Handle a subset of numpy indexing with regards to the dimension
- order as specified in :attr:`transposition`
-
- Following features are **not supported**:
-
- - fancy indexing using numpy arrays
- - using ellipsis objects
-
- :param item: Index
- :return: value or slice as a numpy array
- """
- # 1-D slicing -> n-D slicing (n=1)
- if not hasattr(item, "__len__"):
- # first dimension index is given
- item = [item]
- # following dimensions are indexed with : (all elements)
- item += [slice(None) for _i in range(self.ndim - 1)]
-
- # n-dimensional slicing
- if len(item) != self.ndim:
- raise IndexError(
- "N-dim slicing requires a tuple of N indices/slices. " +
- "Needed dimensions: %d" % self.ndim)
-
- # get list of indices sorted in the original images order
- sorted_indices = self.__sort_indices(item)
- list_idx, array_idx = sorted_indices[0], sorted_indices[1:]
-
- images_selection = self.images[list_idx]
-
- # now we must transpose the output data
- output_dimensions = []
- frozen_dimensions = []
- for i, idx in enumerate(item):
- # slices and sequences
- if not isinstance(idx, numbers.Integral):
- output_dimensions.append(self.transposition[i])
- # regular integer index
- else:
- # whenever a dimension is fixed (indexed by an integer)
- # the number of output dimension is reduced
- frozen_dimensions.append(self.transposition[i])
-
- # decrement output dimensions that are above frozen dimensions
- for frozen_dim in reversed(sorted(frozen_dimensions)):
- for i, out_dim in enumerate(output_dimensions):
- if out_dim > frozen_dim:
- output_dimensions[i] -= 1
-
- assert (len(output_dimensions) + len(frozen_dimensions)) == self.ndim
- assert set(output_dimensions) == set(range(len(output_dimensions)))
-
- # single list elements selected
- if isinstance(images_selection, numpy.ndarray):
- return numpy.transpose(images_selection[array_idx],
- axes=output_dimensions)
- # muliple list elements selected
- else:
- # apply selection first
- output_stack = []
- for img in images_selection:
- output_stack.append(img[array_idx])
- # then cast into a numpy array, and transpose
- return numpy.transpose(numpy.array(output_stack),
- axes=output_dimensions)
-
- def min(self):
- """
- :return: Global minimum value
- """
- min_value = self.images[0].min()
- if len(self.images) > 1:
- for img in self.images[1:]:
- min_value = min(min_value, img.min())
- return min_value
-
- def max(self):
- """
- :return: Global maximum value
- """
- max_value = self.images[0].max()
- if len(self.images) > 1:
- for img in self.images[1:]:
- max_value = max(max_value, img.max())
- return max_value
-
-
-class DatasetView(object):
- """This class provides a way to transpose a dataset without
- casting it into a numpy array. This way, the dataset in a file need not
- necessarily be integrally read into memory to view it in a different
- transposition.
-
- .. note::
- The performances depend a lot on the way the dataset was written
- to file. Depending on the chunking strategy, reading a complete 2D slice
- in an unfavorable direction may still require the entire dataset to
- be read from disk.
-
- :param dataset: h5py dataset
- :param transposition: List of dimensions sorted in the order of
- transposition (relative to the original h5py dataset)
- """
- def __init__(self, dataset, transposition=None):
- """
-
- """
- super(DatasetView, self).__init__()
- self.dataset = dataset
- """original dataset"""
-
- self.shape = dataset.shape
- """Tuple of array dimensions"""
- self.dtype = dataset.dtype
- """Data-type of the array’s element"""
- self.ndim = len(dataset.shape)
- """Number of array dimensions"""
-
- size = 0
- if self.ndim:
- size = 1
- for dimsize in self.shape:
- size *= dimsize
- self.size = size
- """Number of elements in the array."""
-
- self.transposition = list(range(self.ndim))
- """List of dimension indices, in an order depending on the
- specified transposition. By default this is simply
- [0, ..., self.ndim], but it can be changed by specifying a different
- `transposition` parameter at initialization.
-
- Use :meth:`transpose`, to create a new :class:`DatasetView`
- with a different :attr:`transposition`.
- """
-
- if transposition is not None:
- assert len(transposition) == self.ndim
- assert set(transposition) == set(list(range(self.ndim))), \
- "Transposition must be a list containing all dimensions"
- self.transposition = transposition
- self.__sort_shape()
-
- def __sort_shape(self):
- """Sort shape in the order defined in :attr:`transposition`
- """
- new_shape = tuple(self.shape[dim] for dim in self.transposition)
- self.shape = new_shape
-
- def __sort_indices(self, indices):
- """Return array indices sorted in the order needed
- to access data in the original non-transposed dataset.
-
- :param indices: Tuple of ndim indices, in the order needed
- to access the view
- :return: Sorted tuple of indices, to access original data
- """
- assert len(indices) == self.ndim
- sorted_indices = tuple(idx for (_, idx) in
- sorted(zip(self.transposition, indices)))
- return sorted_indices
-
- def __getitem__(self, item):
- """Handle fancy indexing with regards to the dimension order as
- specified in :attr:`transposition`
-
- The supported fancy-indexing syntax is explained at
- http://docs.h5py.org/en/latest/high/dataset.html#fancy-indexing.
-
- Additional restrictions exist if the data has been transposed:
-
- - numpy boolean array indexing is not supported
- - ellipsis objects are not supported
-
- :param item: Index, possibly fancy index (must be supported by h5py)
- :return: Sliced numpy array or numpy scalar
- """
- # no transposition, let the original dataset handle indexing
- if self.transposition == list(range(self.ndim)):
- return self.dataset[item]
-
- # 1-D slicing: create a list of indices to switch to n-D slicing
- if not hasattr(item, "__len__"):
- # first dimension index (list index) is given
- item = [item]
- # following dimensions are indexed with slices representing all elements
- item += [slice(None) for _i in range(self.ndim - 1)]
-
- # n-dimensional slicing
- if len(item) != self.ndim:
- raise IndexError(
- "N-dim slicing requires a tuple of N indices/slices. " +
- "Needed dimensions: %d" % self.ndim)
-
- # get list of indices sorted in the original dataset order
- sorted_indices = self.__sort_indices(item)
-
- output_data_not_transposed = self.dataset[sorted_indices]
-
- # now we must transpose the output data
- output_dimensions = []
- frozen_dimensions = []
- for i, idx in enumerate(item):
- # slices and sequences
- if not isinstance(idx, int):
- output_dimensions.append(self.transposition[i])
- # regular integer index
- else:
- # whenever a dimension is fixed (indexed by an integer)
- # the number of output dimension is reduced
- frozen_dimensions.append(self.transposition[i])
-
- # decrement output dimensions that are above frozen dimensions
- for frozen_dim in reversed(sorted(frozen_dimensions)):
- for i, out_dim in enumerate(output_dimensions):
- if out_dim > frozen_dim:
- output_dimensions[i] -= 1
-
- assert (len(output_dimensions) + len(frozen_dimensions)) == self.ndim
- assert set(output_dimensions) == set(range(len(output_dimensions)))
-
- return numpy.transpose(output_data_not_transposed,
- axes=output_dimensions)
-
- def __array__(self, dtype=None):
- """Cast the dataset into a numpy array, and return it.
-
- If a transposition has been done on this dataset, return
- a transposed view of a numpy array."""
- return numpy.transpose(numpy.array(self.dataset, dtype=dtype),
- self.transposition)
-
- def __len__(self):
- return self.shape[0]
-
- def transpose(self, transposition=None):
- """Return a re-ordered (dimensions permutated)
- :class:`DatasetView`.
-
- The returned object refers to
- the same dataset but with a different :attr:`transposition`.
-
- :param List[int] transposition: List of dimension numbers in the wanted order.
- If ``None`` (default), reverse the dimensions.
- :return: Transposed DatasetView
- """
- # by default, reverse the dimensions
- if transposition is None:
- transposition = list(reversed(self.transposition))
-
- # If this DatasetView is already transposed, sort new transposition
- # relative to old transposition
- elif list(self.transposition) != list(range(self.ndim)):
- transposition = [self.transposition[i] for i in transposition]
-
- return DatasetView(self.dataset,
- transposition)
-
- @property
- def T(self):
- """
- Same as self.transpose()
-
- :return: DatasetView with dimensions reversed."""
- return self.transpose()