# -*- coding: utf-8 -*-
#
#    Project: Azimuthal integration
#             https://github.com/silx-kit/pyFAI
#
#    Copyright (C) 2012-2017 European Synchrotron Radiation Facility, Grenoble, France
#
#    Principal author:       Jérôme Kieffer (Jerome.Kieffer@ESRF.eu)
#
#  Permission is hereby granted, free of charge, to any person obtaining a copy
#  of this software and associated documentation files (the "Software"), to deal
#  in the Software without restriction, including without limitation the rights
#  to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
#  copies of the Software, and to permit persons to whom the Software is
#  furnished to do so, subject to the following conditions:
#  .
#  The above copyright notice and this permission notice shall be included in
#  all copies or substantial portions of the Software.
#  .
#  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
#  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
#  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
#  AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
#  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
#  OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
#  THE SOFTWARE.

"""A module for performing the 1d, 2d and 3d median filter ...

The target is to mimic the signature of scipy.signal.medfilt and scipy.medfilt2

The first implementation targets 2D implementation where this operation is costly (~10s/2kx2k image)
"""
from __future__ import absolute_import, print_function, with_statement, division


__author__ = "Jerome Kieffer"
__license__ = "MIT"
__date__ = "12/09/2017"
__copyright__ = "2012-2017, ESRF, Grenoble"
__contact__ = "jerome.kieffer@esrf.fr"

import logging
import numpy
from collections import OrderedDict

from .common import pyopencl, kernel_workgroup_size
from .processing import EventDescription, OpenclProcessing, BufferDescription

if pyopencl:
    mf = pyopencl.mem_flags
else:
    raise ImportError("pyopencl is not installed")
logger = logging.getLogger(__name__)


class MedianFilter2D(OpenclProcessing):
    """A class for doing median filtering using OpenCL"""
    buffers = [
               BufferDescription("result", 1, numpy.float32, mf.WRITE_ONLY),
               BufferDescription("image_raw", 1, numpy.float32, mf.READ_ONLY),
               BufferDescription("image", 1, numpy.float32, mf.READ_WRITE),
               ]
    kernel_files = ["preprocess.cl", "bitonic.cl", "medfilt.cl"]
    mapping = {numpy.int8: "s8_to_float",
               numpy.uint8: "u8_to_float",
               numpy.int16: "s16_to_float",
               numpy.uint16: "u16_to_float",
               numpy.uint32: "u32_to_float",
               numpy.int32: "s32_to_float"}

    def __init__(self, shape, kernel_size=(3, 3),
                 ctx=None, devicetype="all", platformid=None, deviceid=None,
                 block_size=None, profile=False
                 ):
        """Constructor of the OpenCL 2D median filtering class

        :param shape: shape of the images to treat
        :param kernel size: 2-tuple of odd values
        :param ctx: actual working context, left to None for automatic
                    initialization from device type or platformid/deviceid
        :param devicetype: type of device, can be "CPU", "GPU", "ACC" or "ALL"
        :param platformid: integer with the platform_identifier, as given by clinfo
        :param deviceid: Integer with the device identifier, as given by clinfo
        :param block_size: preferred workgroup size, may vary depending on the outpcome of the compilation
        :param profile: switch on profiling to be able to profile at the kernel level,
                        store profiling elements (makes code slightly slower)
        """
        OpenclProcessing.__init__(self, ctx=ctx, devicetype=devicetype,
                                  platformid=platformid, deviceid=deviceid,
                                  block_size=block_size, profile=profile)
        self.shape = shape
        self.size = self.shape[0] * self.shape[1]
        self.kernel_size = self.calc_kernel_size(kernel_size)
        self.workgroup_size = (self.calc_wg(self.kernel_size), 1)  # 3D kernel
        self.buffers = [BufferDescription(i.name, i.size * self.size, i.dtype, i.flags)
                        for i in self.__class__.buffers]

        self.allocate_buffers()
        self.local_mem = self._get_local_mem(self.workgroup_size[0])
        OpenclProcessing.compile_kernels(self, self.kernel_files, "-D NIMAGE=%i" % self.size)
        self.set_kernel_arguments()

    def set_kernel_arguments(self):
        """Parametrize all kernel arguments
        """
        for val in self.mapping.values():
            self.cl_kernel_args[val] = OrderedDict(((i, self.cl_mem[i]) for i in ("image_raw", "image")))
        self.cl_kernel_args["medfilt2d"] = OrderedDict((("image", self.cl_mem["image"]),
                                                        ("result", self.cl_mem["result"]),
                                                        ("local", self.local_mem),
                                                        ("khs1", numpy.int32(self.kernel_size[0] // 2)),  # Kernel half-size along dim1 (lines)
                                                        ("khs2", numpy.int32(self.kernel_size[1] // 2)),  # Kernel half-size along dim2 (columns)
                                                        ("height", numpy.int32(self.shape[0])),  # Image size along dim1 (lines)
                                                        ("width", numpy.int32(self.shape[1]))))
#                                                         ('debug', self.cl_mem["debug"])))  # Image size along dim2 (columns))

    def _get_local_mem(self, wg):
        return pyopencl.LocalMemory(wg * 32)  # 4byte per float, 8 element per thread

    def send_buffer(self, data, dest):
        """Send a numpy array to the device, including the cast on the device if possible

        :param data: numpy array with data
        :param dest: name of the buffer as registered in the class
        """

        dest_type = numpy.dtype([i.dtype for i in self.buffers if i.name == dest][0])
        events = []
        if (data.dtype == dest_type) or (data.dtype.itemsize > dest_type.itemsize):
            copy_image = pyopencl.enqueue_copy(self.queue, self.cl_mem[dest], numpy.ascontiguousarray(data, dest_type))
            events.append(EventDescription("copy H->D %s" % dest, copy_image))
        else:
            copy_image = pyopencl.enqueue_copy(self.queue, self.cl_mem["image_raw"], numpy.ascontiguousarray(data))
            kernel = getattr(self.program, self.mapping[data.dtype.type])
            cast_to_float = kernel(self.queue, (self.size,), None, self.cl_mem["image_raw"], self.cl_mem[dest])
            events += [EventDescription("copy H->D %s" % dest, copy_image), EventDescription("cast to float", cast_to_float)]
        if self.profile:
            self.events += events

    def calc_wg(self, kernel_size):
        """calculate and return the optimal workgroup size for the first dimension, taking into account
        the 8-height band

        :param kernel_size: 2-tuple of int, shape of the median window
        :return: optimal workgroup size
        """
        needed_threads = ((kernel_size[0] + 7) // 8) * kernel_size[1]
        if needed_threads < 8:
            wg = 8
        elif needed_threads < 32:
            wg = 32
        else:
            wg = 1 << (int(needed_threads).bit_length())
        return wg

    def medfilt2d(self, image, kernel_size=None):
        """Actually apply the median filtering on the image

        :param image: numpy array with the image
        :param kernel_size: 2-tuple if
        :return: median-filtered  2D image


        Nota: for window size 1x1 -> 7x7     up to 49  /  64 elements in   8 threads, 8elt/th
                              9x9 -> 15x15   up to 225 / 256 elements in  32 threads, 8elt/th
                              17x17 -> 21x21 up to 441 / 512 elements in  64 threads, 8elt/th

        TODO: change window size on the fly,


        """
        events = []
        if kernel_size is None:
            kernel_size = self.kernel_size
        else:
            kernel_size = self.calc_kernel_size(kernel_size)
        kernel_half_size = kernel_size // numpy.int32(2)
        # this is the workgroup size
        wg = self.calc_wg(kernel_size)

        # check for valid work group size:
        amws = kernel_workgroup_size(self.program, "medfilt2d")
        logger.warning("max actual workgroup size: %s, expected: %s", amws, wg)
        if wg > amws:
            raise RuntimeError("Workgroup size is too big for medfilt2d: %s>%s" % (wg, amws))

        localmem = self._get_local_mem(wg)

        assert image.ndim == 2, "Treat only 2D images"
        assert image.shape[0] <= self.shape[0], "height is OK"
        assert image.shape[1] <= self.shape[1], "width is OK"

        with self.sem:
            self.send_buffer(image, "image")

            kwargs = self.cl_kernel_args["medfilt2d"]
            kwargs["local"] = localmem
            kwargs["khs1"] = kernel_half_size[0]
            kwargs["khs2"] = kernel_half_size[1]
            kwargs["height"] = numpy.int32(image.shape[0])
            kwargs["width"] = numpy.int32(image.shape[1])
#             for k, v in kwargs.items():
#                 print("%s: %s (%s)" % (k, v, type(v)))
            mf2d = self.kernels.medfilt2d(self.queue,
                                          (wg, image.shape[1]),
                                          (wg, 1), *list(kwargs.values()))
            events.append(EventDescription("median filter 2d", mf2d))

            result = numpy.empty(image.shape, numpy.float32)
            ev = pyopencl.enqueue_copy(self.queue, result, self.cl_mem["result"])
            events.append(EventDescription("copy D->H result", ev))
            ev.wait()
        if self.profile:
            self.events += events
        return result
    __call__ = medfilt2d

    @staticmethod
    def calc_kernel_size(kernel_size):
        """format the kernel size to be a 2-length numpy array of int32
        """
        kernel_size = numpy.asarray(kernel_size, dtype=numpy.int32)
        if kernel_size.shape == ():
            kernel_size = numpy.repeat(kernel_size.item(), 2).astype(numpy.int32)
        for size in kernel_size:
            if (size % 2) != 1:
                raise ValueError("Each element of kernel_size should be odd.")
        return kernel_size


class _MedFilt2d(object):
    median_filter = None

    @classmethod
    def medfilt2d(cls, ary, kernel_size=3):
        """Median filter a 2-dimensional array.

        Apply a median filter to the `input` array using a local window-size
        given by `kernel_size` (must be odd).

        :param ary: A 2-dimensional input array.
        :param kernel_size: A scalar or a list of length 2, giving the size of the
                            median filter window in each dimension.  Elements of
                            `kernel_size` should be odd.  If `kernel_size` is a scalar,
                            then this scalar is used as the size in each dimension.
                            Default is a kernel of size (3, 3).
        :return: An array the same size as input containing the median filtered
                result. always work on float32 values

        About the padding:

        * The filling mode in scipy.signal.medfilt2d is zero-padding
        * This implementation is equivalent to:
            scipy.ndimage.filters.median_filter(ary, kernel_size, mode="nearest")

        """
        image = numpy.atleast_2d(ary)
        shape = numpy.array(image.shape)
        if cls.median_filter is None:
            cls.median_filter = MedianFilter2D(image.shape, kernel_size)
        elif (numpy.array(cls.median_filter.shape) < shape).any():
            # enlarger the buffer size
            new_shape = numpy.maximum(numpy.array(cls.median_filter.shape), shape)
            ctx = cls.median_filter.ctx
            cls.median_filter = MedianFilter2D(new_shape, kernel_size, ctx=ctx)
        return cls.median_filter.medfilt2d(image, kernel_size=kernel_size)

medfilt2d = _MedFilt2d.medfilt2d