diff options
Diffstat (limited to 'src/silx/math/histogram.py')
-rw-r--r-- | src/silx/math/histogram.py | 593 |
1 files changed, 593 insertions, 0 deletions
diff --git a/src/silx/math/histogram.py b/src/silx/math/histogram.py new file mode 100644 index 0000000..af9ee68 --- /dev/null +++ b/src/silx/math/histogram.py @@ -0,0 +1,593 @@ +# coding: utf-8 +# /*########################################################################## +# Copyright (C) 2016 European Synchrotron Radiation Facility +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. +# +# ############################################################################*/ + + +""" +This module provides a function and a class to compute multidimensional +histograms. + + +Classes +======= + +- :class:`Histogramnd` : multi dimensional histogram. +- :class:`HistogramndLut` : optimized to compute several histograms from data sharing the same coordinates. + +Examples +======== + +Single histogram +---------------- + +Given some 3D data: + +>>> import numpy as np +>>> shape = (10**7, 3) +>>> sample = np.random.random(shape) * 500 +>>> weights = np.random.random((shape[0],)) + +Computing the histogram with Histogramnd : + +>>> from silx.math import Histogramnd +>>> n_bins = 35 +>>> ranges = [[40., 150.], [-130., 250.], [0., 505]] +>>> histo, w_histo, edges = Histogramnd(sample, n_bins=n_bins, histo_range=ranges, weights=weights) + +Histogramnd can accumulate sets of data that don't have the same +coordinates : + +>>> from silx.math import Histogramnd +>>> histo_obj = Histogramnd(sample, n_bins=n_bins, histo_range=ranges, weights=weights) +>>> sample_2 = np.random.random(shape) * 200 +>>> weights_2 = np.random.random((shape[0],)) +>>> histo_obj.accumulate(sample_2, weights=weights_2) + +And then access the results: + +>>> histo = histo_obj.histo +>>> weighted_histo = histo_obj.weighted_histo + +or even: + +>>> histo, w_histo, edges = histo_obj + +Accumulating histograms (LUT) +----------------------------- +In some situations we need to compute the weighted histogram of several +sets of data (weights) that have the same coordinates (sample). + +Again, some data (2 sets of weights) : + +>>> import numpy as np +>>> shape = (10**7, 3) +>>> sample = np.random.random(shape) * 500 +>>> weights_1 = np.random.random((shape[0],)) +>>> weights_2 = np.random.random((shape[0],)) + +And getting the result with HistogramLut : + +>>> from silx.math import HistogramndLut + +>>> n_bins = 35 +>>> ranges = [[40., 150.], [-130., 250.], [0., 505]] + +>>> histo_lut = HistogramndLut(sample, ranges, n_bins) + +First call, with weight_1 : + +>>> histo_lut.accumulate(weights_1) + +Second call, with weight_2 : + +>>> histo_lut.accumulate(weights_2) + +Retrieving the results (this is a copy of what's actually stored in +this instance) : + +>>> histo = histo_lut.histo +>>> w_histo = histo_lut.weighted_histo + +Note that the following code gives the same result, but the +HistogramndLut instance does not store the accumulated weighted histogram. + +First call with weights_1 + +>>> histo, w_histo = histo_lut.apply_lut(weights_1) + +Second call with weights_2 + +>>> histo, w_histo = histo_lut.apply_lut(weights_2, histo=histo, weighted_histo=w_histo) + +Bin edges +--------- +When computing an histogram the caller is asked to provide the histogram +range along each coordinates (parameter *histo_range*). This parameter must +be given a [N, 2] array where N is the number of dimensions of the histogram. + +In other words, the caller must provide, for each dimension, +the left edge of the first (*leftmost*) bin, and the right edge of the +last (*rightmost*) bin. + +E.g. : for a 1D sample, for a histo_range equal to [0, 10] and n_bins=4, the +bins ranges will be : + +* [0, 2.5[, [2.5, 5[, [5, 7.5[, [7.5, 10 **[** if last_bin_closed = **False** +* [0, 2.5[, [2.5, 5[, [5, 7.5[, [7.5, 10 **]** if last_bin_closed = **True** + +.... +""" + +__authors__ = ["D. Naudet"] +__license__ = "MIT" +__date__ = "02/10/2017" + +import numpy as np +from .chistogramnd import chistogramnd as _chistogramnd # noqa +from .chistogramnd_lut import histogramnd_get_lut as _histo_get_lut +from .chistogramnd_lut import histogramnd_from_lut as _histo_from_lut + + +class Histogramnd(object): + """ + Computes the multidimensional histogram of some data. + """ + + def __init__(self, + sample, + histo_range, + n_bins, + weights=None, + weight_min=None, + weight_max=None, + last_bin_closed=False, + wh_dtype=None): + """ + :param sample: + The data to be histogrammed. + Its shape must be either + (N,) if it contains one dimensional coordinates, + or an (N,D) array where the rows are the + coordinates of points in a D dimensional space. + The following dtypes are supported : :class:`numpy.float64`, + :class:`numpy.float32`, :class:`numpy.int32`. + + .. warning:: if sample is not a C_CONTIGUOUS ndarray (e.g : a non + contiguous slice) then histogramnd will have to do make an internal + copy. + :type sample: :class:`numpy.array` + + :param histo_range: + A (N, 2) array containing the histogram range along each dimension, + where N is the sample's number of dimensions. + :type histo_range: array_like + + :param n_bins: + The number of bins : + * a scalar (same number of bins for all dimensions) + * a D elements array (number of bins for each dimensions) + :type n_bins: scalar or array_like + + :param weights: + A N elements numpy array of values associated with + each sample. + The values of the *weighted_histo* array + returned by the function are equal to the sum of + the weights associated with the samples falling + into each bin. + The following dtypes are supported : :class:`numpy.float64`, + :class:`numpy.float32`, :class:`numpy.int32`. + + .. note:: If None, the weighted histogram returned will be None. + :type weights: *optional*, :class:`numpy.array` + + :param weight_min: + Use this parameter to filter out all samples whose + weights are lower than this value. + + .. note:: This value will be cast to the same type + as *weights*. + :type weight_min: *optional*, scalar + + :param weight_max: + Use this parameter to filter out all samples whose + weights are higher than this value. + + .. note:: This value will be cast to the same type + as *weights*. + + :type weight_max: *optional*, scalar + + :param last_bin_closed: + By default the last bin is half + open (i.e.: [x,y) ; x included, y + excluded), like all the other bins. + Set this parameter to true if you want + the LAST bin to be closed. + :type last_bin_closed: *optional*, :class:`python.boolean` + + :param wh_dtype: type of the weighted histogram array. + If not provided, the weighted histogram array will contain values + of type numpy.double. Allowed values are : `numpy.double` and + `numpy.float32` + :type wh_dtype: *optional*, numpy data type + """ + + self.__histo_range = histo_range + self.__n_bins = n_bins + self.__last_bin_closed = last_bin_closed + self.__wh_dtype = wh_dtype + + if sample is None: + self.__data = [None, None, None] + else: + self.__data = _chistogramnd(sample, + self.__histo_range, + self.__n_bins, + weights=weights, + weight_min=weight_min, + weight_max=weight_max, + last_bin_closed=self.__last_bin_closed, + wh_dtype=self.__wh_dtype) + + def __getitem__(self, key): + """ + If necessary, results can be unpacked from an instance of Histogramnd : + *histogram*, *weighted histogram*, *bins edge*. + + Example : + + .. code-block:: python + + histo, w_histo, edges = Histogramnd(sample, histo_range, n_bins, weights) + + """ + return self.__data[key] + + def accumulate(self, + sample, + weights=None, + weight_min=None, + weight_max=None): + """ + Computes the multidimensional histogram of some data and accumulates it + into the histogram held by this instance of Histogramnd. + + :param sample: + The data to be histogrammed. + Its shape must be either + (N,) if it contains one dimensional coordinates, + or an (N,D) array where the rows are the + coordinates of points in a D dimensional space. + The following dtypes are supported : :class:`numpy.float64`, + :class:`numpy.float32`, :class:`numpy.int32`. + + .. warning:: if sample is not a C_CONTIGUOUS ndarray (e.g : a non + contiguous slice) then histogramnd will have to do make an internal + copy. + :type sample: :class:`numpy.array` + + :param weights: + A N elements numpy array of values associated with + each sample. + The values of the *weighted_histo* array + returned by the function are equal to the sum of + the weights associated with the samples falling + into each bin. + The following dtypes are supported : :class:`numpy.float64`, + :class:`numpy.float32`, :class:`numpy.int32`. + + .. note:: If None, the weighted histogram returned will be None. + :type weights: *optional*, :class:`numpy.array` + + :param weight_min: + Use this parameter to filter out all samples whose + weights are lower than this value. + + .. note:: This value will be cast to the same type + as *weights*. + :type weight_min: *optional*, scalar + + :param weight_max: + Use this parameter to filter out all samples whose + weights are higher than this value. + + .. note:: This value will be cast to the same type + as *weights*. + :type weight_max: *optional*, scalar + """ + result = _chistogramnd(sample, + self.__histo_range, + self.__n_bins, + weights=weights, + weight_min=weight_min, + weight_max=weight_max, + last_bin_closed=self.__last_bin_closed, + histo=self.__data[0], + weighted_histo=self.__data[1], + wh_dtype=self.__wh_dtype) + if self.__data[0] is None: + self.__data = result + elif self.__data[1] is None and result[1] is not None: + self.__data = result + + histo = property(lambda self: self[0]) + """ Histogram array, or None if this instance was initialized without + <sample> and accumulate has not been called yet. + + .. note:: this is a **reference** to the array store in this + Histogramnd instance, use with caution. + """ + weighted_histo = property(lambda self: self[1]) + """ Weighted Histogram, or None if this instance was initialized without + <sample>, or no weights have been passed to __init__ nor accumulate. + + .. note:: this is a **reference** to the array store in this + Histogramnd instance, use with caution. + """ + edges = property(lambda self: self[2]) + """ Bins edges, or None if this instance was initialized without + <sample> and accumulate has not been called yet. + """ + + +class HistogramndLut(object): + """ + The HistogramndLut class allows you to bin data onto a regular grid. + The use of HistogramndLut is interesting when several sets of data that + share the same coordinates (*sample*) have to be mapped onto the same grid. + """ + + def __init__(self, + sample, + histo_range, + n_bins, + last_bin_closed=False, + dtype=None): + """ + :param sample: + The coordinates of the data to be histogrammed. + Its shape must be either (N,) if it contains one dimensional + coordinates, or an (N, D) array where the rows are the + coordinates of points in a D dimensional space. + The following dtypes are supported : :class:`numpy.float64`, + :class:`numpy.float32`, :class:`numpy.int32`. + :type sample: :class:`numpy.array` + + :param histo_range: + A (N, 2) array containing the histogram range along each dimension, + where N is the sample's number of dimensions. + :type histo_range: array_like + + :param n_bins: + The number of bins : + * a scalar (same number of bins for all dimensions) + * a D elements array (number of bins for each dimensions) + :type n_bins: scalar or array_like + + :param dtype: data type of the weighted histogram. If None, the data type + will be the same as the first weights array provided (on first call of + the instance). + :type dtype: `numpy.dtype` + + :param last_bin_closed: + By default the last bin is half + open (i.e.: [x,y) ; x included, y + excluded), like all the other bins. + Set this parameter to true if you want + the LAST bin to be closed. + :type last_bin_closed: *optional*, :class:`python.boolean` + """ + lut, histo, edges = _histo_get_lut(sample, + histo_range, + n_bins, + last_bin_closed=last_bin_closed) + + self.__n_bins = np.array(histo.shape) + self.__histo_range = histo_range + self.__lut = lut + self.__histo = None + self.__weighted_histo = None + self.__edges = edges + self.__dtype = dtype + self.__shape = histo.shape + self.__last_bin_closed = last_bin_closed + self.clear() + + def clear(self): + """ + Resets the instance (zeroes the histograms). + """ + self.__weighted_histo = None + self.__histo = None + + @property + def lut(self): + """ + Copy of the Lut + """ + return self.__lut.copy() + + def histo(self, copy=True): + """ + Histogram (a copy of it), or None if `~accumulate` has not been called yet + (or clear was just called). + If *copy* is set to False then the actual reference to the array is + returned *(use with caution)*. + """ + if copy and self.__histo is not None: + return self.__histo.copy() + return self.__histo + + def weighted_histo(self, copy=True): + """ + Weighted histogram (a copy of it), or None if `~accumulate` has not been called yet + (or clear was just called). If *copy* is set to False then the actual + reference to the array is returned *(use with caution)*. + """ + if copy and self.__weighted_histo is not None: + return self.__weighted_histo.copy() + return self.__weighted_histo + + @property + def histo_range(self): + """ + Bins ranges. + """ + return self.__histo_range.copy() + + @property + def n_bins(self): + """ + Number of bins in each direction. + """ + return self.__n_bins.copy() + + @property + def bins_edges(self): + """ + Bins edges of the histograms, one array for each dimensions. + """ + return tuple([edges[:] for edges in self.__edges]) + + @property + def last_bin_closed(self): + """ + Returns True if the rightmost bin in each dimension is close (i.e : + values equal to the rightmost bin edge is included in the bin). + """ + return self.__last_bin_closed + + def accumulate(self, + weights, + weight_min=None, + weight_max=None): + """ + Computes the multidimensional histogram of some data and adds it to + the current histogram stored by this instance. The results can be + retrieved with the :attr:`~.histo` and :attr:`~.weighted_histo` + properties. + + :param weights: + A numpy array of values associated with each sample. The number of + elements in the array must be the same as the number of samples + provided at instantiation time. + :type histo_range: array_like + + :param weight_min: + Use this parameter to filter out all samples whose + weights are lower than this value. + + .. note:: This value will be cast to the same type + as *weights*. + :type weight_min: *optional*, scalar + + :param weight_max: + Use this parameter to filter out all samples whose + weights are higher than this value. + + .. note:: This value will be cast to the same type + as *weights*. + + :type weight_max: *optional*, scalar + """ + if self.__dtype is None: + self.__dtype = weights.dtype + + histo, w_histo = _histo_from_lut(weights, + self.__lut, + histo=self.__histo, + weighted_histo=self.__weighted_histo, + shape=self.__shape, + dtype=self.__dtype, + weight_min=weight_min, + weight_max=weight_max) + + if self.__histo is None: + self.__histo = histo + + if self.__weighted_histo is None: + self.__weighted_histo = w_histo + + def apply_lut(self, + weights, + histo=None, + weighted_histo=None, + weight_min=None, + weight_max=None): + """ + Computes the multidimensional histogram of some data and returns the + result (it is NOT added to the current histogram stored by this + instance). + + :param weights: + A numpy array of values associated with each sample. The number of + elements in the array must be the same as the number of samples + provided at instantiation time. + :type histo_range: array_like + + :param histo: + Use this parameter if you want to pass your + own histogram array instead of the one + created by this function. New values + will be added to this array. The returned array + will then be this one. + :type histo: *optional*, :class:`numpy.array` + + :param weighted_histo: + Use this parameter if you want to pass your + own weighted histogram array instead of + the created by this function. New + values will be added to this array. The returned array + will then be this one (same reference). + :type weighted_histo: *optional*, :class:`numpy.array` + + :param weight_min: + Use this parameter to filter out all samples whose + weights are lower than this value. + + .. note:: This value will be cast to the same type + as *weights*. + :type weight_min: *optional*, scalar + + :param weight_max: + Use this parameter to filter out all samples whose + weights are higher than this value. + + .. note:: This value will be cast to the same type + as *weights*. + :type weight_max: *optional*, scalar + """ + histo, w_histo = _histo_from_lut(weights, + self.__lut, + histo=histo, + weighted_histo=weighted_histo, + shape=self.__shape, + dtype=self.__dtype, + weight_min=weight_min, + weight_max=weight_max) + self.__dtype = w_histo.dtype + return histo, w_histo + +if __name__ == '__main__': + pass |