diff options
Diffstat (limited to 'src/silx/math/chistogramnd_lut.pyx')
-rw-r--r-- | src/silx/math/chistogramnd_lut.pyx | 435 |
1 files changed, 435 insertions, 0 deletions
diff --git a/src/silx/math/chistogramnd_lut.pyx b/src/silx/math/chistogramnd_lut.pyx new file mode 100644 index 0000000..3a3f05e --- /dev/null +++ b/src/silx/math/chistogramnd_lut.pyx @@ -0,0 +1,435 @@ +# coding: utf-8 +# /*########################################################################## +# Copyright (C) 2016-2018 European Synchrotron Radiation Facility +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. +# +# ############################################################################*/ + +__authors__ = ["D. Naudet"] +__license__ = "MIT" +__date__ = "15/05/2016" + + +cimport numpy as cnumpy # noqa +cimport cython +import numpy as np + +ctypedef fused sample_t: + cnumpy.float64_t + cnumpy.float32_t + cnumpy.int32_t + cnumpy.int64_t + +ctypedef fused cumul_t: + cnumpy.float64_t + cnumpy.float32_t + cnumpy.int32_t + cnumpy.int64_t + +ctypedef fused weights_t: + cnumpy.float64_t + cnumpy.float32_t + cnumpy.int32_t + cnumpy.int64_t + +ctypedef fused lut_t: + cnumpy.int64_t + cnumpy.int32_t + cnumpy.int16_t + + +def histogramnd_get_lut(sample, + histo_range, + n_bins, + last_bin_closed=False): + """TBD + + :param sample: + The data to be histogrammed. + Its shape must be either (N,) if it contains one dimensional + coordinates, or an (N, D) array where the rows are the + coordinates of points in a D dimensional space. + The following dtypes are supported : :class:`numpy.float64`, + :class:`numpy.float32`, :class:`numpy.int32`. + :type sample: :class:`numpy.array` + + :param histo_range: + A (N, 2) array containing the histogram range along each dimension, + where N is the sample's number of dimensions. + :type histo_range: array_like + + :param n_bins: + The number of bins : + * a scalar (same number of bins for all dimensions) + * a D elements array (number of bins for each dimensions) + :type n_bins: scalar or array_like + + :param last_bin_closed: + By default the last bin is half + open (i.e.: [x,y) ; x included, y + excluded), like all the other bins. + Set this parameter to true if you want + the LAST bin to be closed. + :type last_bin_closed: *optional*, :class:`python.boolean` + + :return: The indices for each sample and the histogram (bin counts). + :rtype: tuple : (:class:`numpy.array`, :class:`numpy.array`) + """ + + s_shape = sample.shape + + n_dims = 1 if len(s_shape) == 1 else s_shape[1] + + # just in case those arent numpy arrays + # (this allows the user to provide native python lists, + # => easier for testing) + i_histo_range = histo_range + histo_range = np.array(histo_range) + err_histo_range = False + + if n_dims == 1: + if histo_range.shape == (2,): + pass + elif histo_range.shape == (1, 2): + histo_range.reshape(-1) + else: + err_histo_range = True + elif n_dims != 1 and histo_range.shape != (n_dims, 2): + err_histo_range = True + + if err_histo_range: + raise ValueError('<histo_range> error : expected {n_dims} sets of ' + 'lower and upper bin edges, ' + 'got the following instead : {histo_range}. ' + '(provided <sample> contains ' + '{n_dims}D values)' + ''.format(histo_range=i_histo_range, + n_dims=n_dims)) + + histo_range = np.double(histo_range) + + # checking n_bins size + n_bins = np.array(n_bins, ndmin=1) + if len(n_bins) == 1: + n_bins = np.tile(n_bins, n_dims) + elif n_bins.shape != (n_dims,): + raise ValueError('n_bins must be either a scalar (same number ' + 'of bins for all dimensions) or ' + 'an array (number of bins for each ' + 'dimension).') + + # checking if None is in n_bins, otherwise a rather cryptic + # exception is thrown when calling np.zeros + # also testing for negative/null values + if np.any(np.equal(n_bins, None)) or np.any(n_bins <= 0): + raise ValueError('<n_bins> : only positive values allowed.') + + sample_type = sample.dtype + + n_elem = sample.size // n_dims + + if n_bins.prod(dtype=np.uint64) < 2**15: + lut_dtype = np.int16 + elif n_bins.prod(dtype=np.uint64) < 2**31: + lut_dtype = np.int32 + + else: + lut_dtype = np.int64 + + # allocating the output arrays + lut = np.zeros(n_elem, dtype=lut_dtype) + histo = np.zeros(n_bins, dtype=np.uint32) + + dtype = sample.dtype.newbyteorder("N") + sample_c = np.ascontiguousarray(sample.reshape((sample.size,)), + dtype=dtype) + + histo_range_c = np.ascontiguousarray(histo_range.reshape((histo_range.size,)), + dtype=histo_range.dtype.newbyteorder("N")) + + n_bins_c = np.ascontiguousarray(n_bins.reshape((n_bins.size,)), + dtype=np.int32) + + lut_c = np.ascontiguousarray(lut.reshape((lut.size,))) + histo_c = np.ascontiguousarray(histo.reshape((histo.size,)), + dtype=histo.dtype.newbyteorder('N')) + + rc = 0 + + try: + rc = _histogramnd_get_lut_fused(sample_c, + n_dims, + n_elem, + histo_range_c, + n_bins_c, + lut_c, + histo_c, + last_bin_closed) + except TypeError as ex: + raise TypeError('Type not supported - sample : {0}' + ''.format(sample_type)) + + if rc != 0: + raise Exception('histogramnd returned an error : {0}' + ''.format(rc)) + + edges = [] + histo_range = histo_range.reshape(-1) + for i_dim in range(n_dims): + dim_edges = np.zeros(n_bins[i_dim] + 1) + rng_min = histo_range[2 * i_dim] + rng_max = histo_range[2 * i_dim + 1] + dim_edges[:-1] = (rng_min + np.arange(n_bins[i_dim]) * + ((rng_max - rng_min) / n_bins[i_dim])) + dim_edges[-1] = rng_max + edges.append(dim_edges) + + return lut, histo, tuple(edges) + + +# ===================== +# ===================== + + +def histogramnd_from_lut(weights, + histo_lut, + histo=None, + weighted_histo=None, + shape=None, + dtype=None, + weight_min=None, + weight_max=None): + """ + dtype ignored if weighted_histo provided + """ + + if histo is None and weighted_histo is None: + if shape is None: + raise ValueError('At least one of the following parameters has to ' + 'be provided : <shape> or <histo> or ' + '<weighted_histo>') + + if shape is not None: + if histo is not None and list(histo.shape) != list(shape): + raise ValueError('The <shape> value does not match' + 'the <histo> shape.') + + if(weighted_histo is not None and + list(weighted_histo.shape) != list(shape)): + raise ValueError('The <shape> value does not match' + 'the <weighted_histo> shape.') + else: + if histo is not None: + shape = histo.shape + else: + shape = weighted_histo.shape + + if histo is not None: + if histo.dtype != np.uint32: + raise ValueError('Provided <histo> array doesn\'t have ' + 'the expected type ' + ': should be {0} instead of {1}.' + ''.format(np.uint32, histo.dtype)) + + if weighted_histo is not None: + if histo.shape != weighted_histo.shape: + raise ValueError('The <histo> shape does not match' + 'the <weighted_histo> shape.') + else: + histo = np.zeros(shape, dtype=np.uint32) + + w_dtype = weights.dtype + + if dtype is None: + if weighted_histo is None: + dtype = w_dtype + else: + dtype = weighted_histo.dtype + elif weighted_histo is not None: + if weighted_histo.dtype != dtype: + raise ValueError('Provided <dtype> and <weighted_histo>\'s dtype' + ' do not match.') + dtype = weighted_histo.dtype + + if weighted_histo is None: + weighted_histo = np.zeros(shape, dtype=dtype) + + if histo_lut.size != weights.size: + raise ValueError('The LUT and weights arrays must have the same ' + 'number of elements.') + + w_c = np.ascontiguousarray(weights.reshape((weights.size,)), + dtype=weights.dtype.newbyteorder('N')) + + h_c = np.ascontiguousarray(histo.reshape((histo.size,)), + dtype=histo.dtype.newbyteorder('N')) + + w_h_c = np.ascontiguousarray(weighted_histo.reshape((weighted_histo.size,)), + dtype=weighted_histo.dtype.newbyteorder('N')) # noqa + + h_lut_c = np.ascontiguousarray(histo_lut.reshape((histo_lut.size,)), + histo_lut.dtype.newbyteorder('N')) + + rc = 0 + + if weight_min is None: + weight_min = 0 + filt_min_weights = False + else: + filt_min_weights = True + + if weight_max is None: + weight_max = 0 + filt_max_weights = False + else: + filt_max_weights = True + + try: + _histogramnd_from_lut_fused(w_c, + h_lut_c, + h_c, + w_h_c, + weights.size, + filt_min_weights, + w_dtype.type(weight_min), + filt_max_weights, + w_dtype.type(weight_max)) + except TypeError as ex: + print(ex) + raise TypeError('Case not supported - weights:{0} ' + 'and histo:{1}.' + ''.format(weights.dtype, histo.dtype)) + + return histo, weighted_histo + + +# ===================== +# ===================== + + +@cython.wraparound(False) +@cython.boundscheck(False) +@cython.initializedcheck(False) +@cython.nonecheck(False) +@cython.cdivision(True) +def _histogramnd_from_lut_fused(weights_t[:] i_weights, + lut_t[:] i_lut, + cnumpy.uint32_t[:] o_histo, + cumul_t[:] o_weighted_histo, + int i_n_elems, + bint i_filt_min_weights, + weights_t i_weight_min, + bint i_filt_max_weights, + weights_t i_weight_max): + with nogil: + for i in range(i_n_elems): + if (i_lut[i] >= 0): + if i_filt_min_weights and i_weights[i] < i_weight_min: + continue + if i_filt_max_weights and i_weights[i] > i_weight_max: + continue + o_histo[i_lut[i]] += 1 + o_weighted_histo[i_lut[i]] += <cumul_t>i_weights[i] # noqa + + +# ===================== +# ===================== + + +@cython.wraparound(False) +@cython.boundscheck(False) +@cython.initializedcheck(False) +@cython.nonecheck(False) +@cython.cdivision(True) +def _histogramnd_get_lut_fused(sample_t[:] i_sample, + int i_n_dims, + int i_n_elems, + double[:] i_histo_range, + int[:] i_n_bins, + lut_t[:] o_lut, + cnumpy.uint32_t[:] o_histo, + bint last_bin_closed): + + cdef: + int i = 0 + long elem_idx = 0 + long max_idx = 0 + long lut_idx = -1 + + # computed bin index (i_sample -> grid) + long bin_idx = 0 + + sample_t elem_coord = 0 + + double[50] g_min + double[50] g_max + double[50] bins_range + + for i in range(i_n_dims): + g_min[i] = i_histo_range[2*i] + g_max[i] = i_histo_range[2*i+1] + bins_range[i] = g_max[i] - g_min[i] + + elem_idx = 0 - i_n_dims + max_idx = i_n_elems * i_n_dims - i_n_dims + + with nogil: + while elem_idx < max_idx: + elem_idx += i_n_dims + lut_idx += 1 + + bin_idx = 0 + + for i in range(i_n_dims): + elem_coord = i_sample[elem_idx+i] + # ===================== + # Element is rejected if any of the following is NOT true : + # 1. coordinate is >= than the minimum value + # 2. coordinate is <= than the maximum value + # 3. coordinate==maximum value and last_bin_closed is True + # ===================== + if elem_coord < g_min[i]: + bin_idx = -1 + break + + # Here we make the assumption that most of the time + # there will be more coordinates inside the grid interval + # (one test) + # than coordinates higher or equal to the max + # (two tests) + if elem_coord < g_max[i]: + bin_idx = <long>(bin_idx * i_n_bins[i] + # noqa + (((elem_coord - g_min[i]) * i_n_bins[i]) / + bins_range[i])) + else: + # if equal and the last bin is closed : + # put it in the last bin + # else : discard + if last_bin_closed and elem_coord == g_max[i]: + bin_idx = (bin_idx + 1) * i_n_bins[i] - 1 + else: + bin_idx = -1 + break + + o_lut[lut_idx] = bin_idx + if bin_idx >= 0: + o_histo[bin_idx] += 1 + + return 0 |