# coding: utf-8
# /*##########################################################################
# Copyright (C) 2016-2017 European Synchrotron Radiation Facility
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.
#
# ############################################################################*/
"""This module provides functions to read fabio images as an HDF5 file.

    >>> import silx.io.fabioh5
    >>> f = silx.io.fabioh5.File("foobar.edf")

.. note:: This module has a dependency on the `h5py <http://www.h5py.org/>`_
    and `fabio <https://github.com/silx-kit/fabio>`_ libraries,
    which are not mandatory dependencies for `silx`.

"""

import collections
import datetime
import logging
import numbers

import fabio
import numpy

from . import commonh5
from silx.third_party import six
from silx import version as silx_version

try:
    import h5py
except ImportError as e:
    h5py = None


_logger = logging.getLogger(__name__)


class FrameData(commonh5.LazyLoadableDataset):
    """Expose a cube of image from a Fabio file using `FabioReader` as
    cache."""

    def __init__(self, name, fabio_reader, parent=None):
        attrs = {"interpretation": "image"}
        commonh5.LazyLoadableDataset.__init__(self, name, parent, attrs=attrs)
        self.__fabio_reader = fabio_reader

    def _create_data(self):
        return self.__fabio_reader.get_data()


class RawHeaderData(commonh5.LazyLoadableDataset):
    """Lazy loadable raw header"""

    def __init__(self, name, fabio_file, parent=None):
        commonh5.LazyLoadableDataset.__init__(self, name, parent)
        self.__fabio_file = fabio_file

    def _create_data(self):
        """Initialize hold data by merging all headers of each frames.
        """
        headers = []
        types = set([])
        for frame in range(self.__fabio_file.nframes):
            if self.__fabio_file.nframes == 1:
                header = self.__fabio_file.header
            else:
                header = self.__fabio_file.getframe(frame).header

            data = []
            for key, value in header.items():
                data.append("%s: %s" % (str(key), str(value)))

            data = "\n".join(data)
            try:
                line = data.encode("ascii")
                types.add(numpy.string_)
            except UnicodeEncodeError:
                try:
                    line = data.encode("utf-8")
                    types.add(numpy.unicode_)
                except UnicodeEncodeError:
                    # Fallback in void
                    line = numpy.void(data)
                    types.add(numpy.void)

            headers.append(line)

        if numpy.void in types:
            dtype = numpy.void
        elif numpy.unicode_ in types:
            dtype = numpy.unicode_
        else:
            dtype = numpy.string_

        if dtype == numpy.unicode_ and h5py is not None:
            # h5py only support vlen unicode
            dtype = h5py.special_dtype(vlen=six.text_type)

        return numpy.array(headers, dtype=dtype)


class MetadataGroup(commonh5.LazyLoadableGroup):
    """Abstract class for groups containing a reference to a fabio image.
    """

    def __init__(self, name, metadata_reader, kind, parent=None, attrs=None):
        commonh5.LazyLoadableGroup.__init__(self, name, parent, attrs)
        self.__metadata_reader = metadata_reader
        self.__kind = kind

    def _create_child(self):
        keys = self.__metadata_reader.get_keys(self.__kind)
        for name in keys:
            data = self.__metadata_reader.get_value(self.__kind, name)
            dataset = commonh5.Dataset(name, data)
            self.add_node(dataset)

    @property
    def _metadata_reader(self):
        return self.__metadata_reader


class DetectorGroup(commonh5.LazyLoadableGroup):
    """Define the detector group (sub group of instrument) using Fabio data.
    """

    def __init__(self, name, fabio_reader, parent=None, attrs=None):
        if attrs is None:
            attrs = {"NX_class": "NXdetector"}
        commonh5.LazyLoadableGroup.__init__(self, name, parent, attrs)
        self.__fabio_reader = fabio_reader

    def _create_child(self):
        data = FrameData("data", self.__fabio_reader)
        self.add_node(data)

        # TODO we should add here Nexus informations we can extract from the
        # metadata

        others = MetadataGroup("others", self.__fabio_reader, kind=FabioReader.DEFAULT)
        self.add_node(others)


class ImageGroup(commonh5.LazyLoadableGroup):
    """Define the image group (sub group of measurement) using Fabio data.
    """

    def __init__(self, name, fabio_reader, parent=None, attrs=None):
        commonh5.LazyLoadableGroup.__init__(self, name, parent, attrs)
        self.__fabio_reader = fabio_reader

    def _create_child(self):
        basepath = self.parent.parent.name
        data = commonh5.SoftLink("data", path=basepath + "/instrument/detector_0/data")
        self.add_node(data)
        detector = commonh5.SoftLink("info", path=basepath + "/instrument/detector_0")
        self.add_node(detector)


class SampleGroup(commonh5.LazyLoadableGroup):
    """Define the image group (sub group of measurement) using Fabio data.
    """

    def __init__(self, name, fabio_reader, parent=None):
        attrs = {"NXclass": "NXsample"}
        commonh5.LazyLoadableGroup.__init__(self, name, parent, attrs)
        self.__fabio_reader = fabio_reader

    def _create_child(self):
        if self.__fabio_reader.has_ub_matrix():
            scalar = {"interpretation": "scalar"}
            data = self.__fabio_reader.get_unit_cell_abc()
            data = commonh5.Dataset("unit_cell_abc", data, attrs=scalar)
            self.add_node(data)
            unit_cell_data = numpy.zeros((1, 6), numpy.float32)
            unit_cell_data[0, :3] = data
            data = self.__fabio_reader.get_unit_cell_alphabetagamma()
            data = commonh5.Dataset("unit_cell_alphabetagamma", data, attrs=scalar)
            self.add_node(data)
            unit_cell_data[0, 3:] = data
            data = commonh5.Dataset("unit_cell", unit_cell_data, attrs=scalar)
            self.add_node(data)
            data = self.__fabio_reader.get_ub_matrix()
            data = commonh5.Dataset("ub_matrix", data, attrs=scalar)
            self.add_node(data)


class MeasurementGroup(commonh5.LazyLoadableGroup):
    """Define the measurement group for fabio file.
    """

    def __init__(self, name, fabio_reader, parent=None, attrs=None):
        commonh5.LazyLoadableGroup.__init__(self, name, parent, attrs)
        self.__fabio_reader = fabio_reader

    def _create_child(self):
        keys = self.__fabio_reader.get_keys(FabioReader.COUNTER)

        # create image measurement but take care that no other metadata use
        # this name
        for i in range(1000):
            name = "image_%i" % i
            if name not in keys:
                data = ImageGroup(name, self.__fabio_reader)
                self.add_node(data)
                break
        else:
            raise Exception("image_i for 0..1000 already used")

        # add all counters
        for name in keys:
            data = self.__fabio_reader.get_value(FabioReader.COUNTER, name)
            dataset = commonh5.Dataset(name, data)
            self.add_node(dataset)


class FabioReader(object):
    """Class which read and cache data and metadata from a fabio image."""

    DEFAULT = 0
    COUNTER = 1
    POSITIONER = 2

    def __init__(self, fabio_file):
        self.__fabio_file = fabio_file
        self.__counters = {}
        self.__positioners = {}
        self.__measurements = {}
        self.__key_filters = set([])
        self.__data = None
        self.__frame_count = self.__fabio_file.nframes
        self._read(self.__fabio_file)

    def fabio_file(self):
        return self.__fabio_file

    def _create_data(self):
        """Initialize hold data by merging all frames into a single cube.

        Choose the cube size which fit the best the data. If some images are
        smaller than expected, the empty space is set to 0.

        The computation is cached into the class, and only done ones.
        """
        images = []
        for frame in range(self.__fabio_file.nframes):
            if self.__fabio_file.nframes == 1:
                image = self.__fabio_file.data
            else:
                image = self.__fabio_file.getframe(frame).data
            images.append(image)

        # returns the data without extra dim in case of single frame
        if len(images) == 1:
            return images[0]

        # get the max size
        max_dim = max([i.ndim for i in images])
        max_shape = [0] * max_dim
        for image in images:
            for dim in range(image.ndim):
                if image.shape[dim] > max_shape[dim]:
                    max_shape[dim] = image.shape[dim]
        max_shape = tuple(max_shape)

        # fix smallest images
        for index, image in enumerate(images):
            if image.shape == max_shape:
                continue
            location = [slice(0, i) for i in image.shape]
            while len(location) < max_dim:
                location.append(0)
            normalized_image = numpy.zeros(max_shape, dtype=image.dtype)
            normalized_image[location] = image
            images[index] = normalized_image

        # create a cube
        return numpy.array(images)

    def __get_dict(self, kind):
        """Returns a dictionary from according to an expected kind"""
        if kind == self.DEFAULT:
            return self.__measurements
        elif kind == self.COUNTER:
            return self.__counters
        elif kind == self.POSITIONER:
            return self.__positioners
        else:
            raise Exception("Unexpected kind %s", kind)

    def get_data(self):
        """Returns a cube from all available data from frames

        :rtype: numpy.ndarray
        """
        if self.__data is None:
            self.__data = self._create_data()
        return self.__data

    def get_keys(self, kind):
        """Get all available keys according to a kind of metadata.

        :rtype: list
        """
        return self.__get_dict(kind).keys()

    def get_value(self, kind, name):
        """Get a metadata value according to the kind and the name.

        :rtype: numpy.ndarray
        """
        value = self.__get_dict(kind)[name]
        if not isinstance(value, numpy.ndarray):
            value = self._convert_metadata_vector(value)
            self.__get_dict(kind)[name] = value
        return value

    def _set_counter_value(self, frame_id, name, value):
        """Set a counter metadata according to the frame id"""
        if name not in self.__counters:
            self.__counters[name] = [None] * self.__frame_count
        self.__counters[name][frame_id] = value

    def _set_positioner_value(self, frame_id, name, value):
        """Set a positioner metadata according to the frame id"""
        if name not in self.__positioners:
            self.__positioners[name] = [None] * self.__frame_count
        self.__positioners[name][frame_id] = value

    def _set_measurement_value(self, frame_id, name, value):
        """Set a measurement metadata according to the frame id"""
        if name not in self.__measurements:
            self.__measurements[name] = [None] * self.__frame_count
        self.__measurements[name][frame_id] = value

    def _read(self, fabio_file):
        """Read all metadata from the fabio file and store it into this
        object."""

        self.__key_filters.clear()
        if hasattr(fabio_file, "RESERVED_HEADER_KEYS"):
            # Provided in fabio 0.5
            for key in fabio_file.RESERVED_HEADER_KEYS:
                self.__key_filters.add(key.lower())

        for frame in range(fabio_file.nframes):
            if fabio_file.nframes == 1:
                header = fabio_file.header
            else:
                header = fabio_file.getframe(frame).header
            self._read_frame(frame, header)

    def _is_filtered_key(self, key):
        """
        If this function returns True, the :meth:`_read_key` while not be
        called with this `key`while reading the metatdata frame.

        :param str key: A key of the metadata
        :rtype: bool
        """
        return key.lower() in self.__key_filters

    def _read_frame(self, frame_id, header):
        """Read all metadata from a frame and store it into this
        object."""
        for key, value in header.items():
            if self._is_filtered_key(key):
                continue
            self._read_key(frame_id, key, value)

    def _read_key(self, frame_id, name, value):
        """Read a key from the metadata and cache it into this object."""
        self._set_measurement_value(frame_id, name, value)

    def _convert_metadata_vector(self, values):
        """Convert a list of numpy data into a numpy array with the better
        fitting type."""
        converted = []
        types = set([])
        has_none = False
        for v in values:
            if v is None:
                converted.append(None)
                has_none = True
            else:
                c = self._convert_value(v)
                converted.append(c)
                types.add(c.dtype)

        if has_none and len(types) == 0:
            # That's a list of none values
            return numpy.array([0] * len(values), numpy.int8)

        result_type = numpy.result_type(*types)

        if issubclass(result_type.type, numpy.string_):
            # use the raw data to create the array
            result = values
        elif issubclass(result_type.type, numpy.unicode_):
            # use the raw data to create the array
            result = values
        else:
            result = converted

        if has_none:
            # Fix missing data according to the array type
            if result_type.kind == "S":
                none_value = b""
            elif result_type.kind == "U":
                none_value = u""
            elif result_type.kind == "f":
                none_value = numpy.float("NaN")
            elif result_type.kind == "i":
                none_value = numpy.int(0)
            elif result_type.kind == "u":
                none_value = numpy.int(0)
            elif result_type.kind == "b":
                none_value = numpy.bool(False)
            else:
                none_value = None

            for index, r in enumerate(result):
                if r is not None:
                    continue
                result[index] = none_value

        return numpy.array(result, dtype=result_type)

    def _convert_value(self, value):
        """Convert a string into a numpy object (scalar or array).

        The value is most of the time a string, but it can be python object
        in case if TIFF decoder for example.
        """
        if isinstance(value, list):
            # convert to a numpy array
            return numpy.array(value)
        if isinstance(value, dict):
            # convert to a numpy associative array
            key_dtype = numpy.min_scalar_type(list(value.keys()))
            value_dtype = numpy.min_scalar_type(list(value.values()))
            associative_type = [('key', key_dtype), ('value', value_dtype)]
            assert key_dtype.kind != "O" and value_dtype.kind != "O"
            return numpy.array(list(value.items()), dtype=associative_type)
        if isinstance(value, numbers.Number):
            dtype = numpy.min_scalar_type(value)
            assert dtype.kind != "O"
            return dtype.type(value)

        if isinstance(value, six.binary_type):
            try:
                value = value.decode('utf-8')
            except UnicodeDecodeError:
                return numpy.void(value)

        if " " in value:
            result = self._convert_list(value)
        else:
            result = self._convert_scalar_value(value)
        return result

    def _convert_scalar_value(self, value):
        """Convert a string into a numpy int or float.

        If it is not possible it returns a numpy string.
        """
        try:
            value = int(value)
            dtype = numpy.min_scalar_type(value)
            assert dtype.kind != "O"
            return dtype.type(value)
        except ValueError:
            try:
                # numpy.min_scalar_type is not able to do very well the job
                # when there is a lot of digit after the dot
                # https://github.com/numpy/numpy/issues/8207
                # Let's count the digit of the string
                digits = len(value) - 1  # minus the dot
                if digits <= 7:
                    # A float32 is accurate with about 7 digits
                    return numpy.float32(value)
                elif digits <= 16:
                    # A float64 is accurate with about 16 digits
                    return numpy.float64(value)
                else:
                    if hasattr(numpy, "float128"):
                        return numpy.float128(value)
                    else:
                        return numpy.float64(value)
            except ValueError:
                return numpy.string_(value)

    def _convert_list(self, value):
        """Convert a string into a typed numpy array.

        If it is not possible it returns a numpy string.
        """
        try:
            numpy_values = []
            values = value.split(" ")
            types = set([])
            for string_value in values:
                v = self._convert_scalar_value(string_value)
                numpy_values.append(v)
                types.add(v.dtype.type)

            result_type = numpy.result_type(*types)

            if issubclass(result_type.type, (numpy.string_, six.binary_type)):
                # use the raw data to create the result
                return numpy.string_(value)
            elif issubclass(result_type.type, (numpy.unicode_, six.text_type)):
                # use the raw data to create the result
                return numpy.unicode_(value)
            else:
                return numpy.array(numpy_values, dtype=result_type)
        except ValueError:
            return numpy.string_(value)

    def has_sample_information(self):
        """Returns true if there is information about the sample in the
        file

        :rtype: bool
        """
        return self.has_ub_matrix()

    def has_ub_matrix(self):
        """Returns true if a UB matrix is available.

        :rtype: bool
        """
        return False


class EdfFabioReader(FabioReader):
    """Class which read and cache data and metadata from a fabio image.

    It is mostly the same as FabioReader, but counter_mne and
    motor_mne are parsed using a special way.
    """

    def __init__(self, fabio_file):
        FabioReader.__init__(self, fabio_file)
        self.__unit_cell_abc = None
        self.__unit_cell_alphabetagamma = None
        self.__ub_matrix = None

    def _read_frame(self, frame_id, header):
        """Overwrite the method to check and parse special keys: counter and
        motors keys."""
        self.__catch_keys = set([])
        if "motor_pos" in header and "motor_mne" in header:
            self.__catch_keys.add("motor_pos")
            self.__catch_keys.add("motor_mne")
            self._read_mnemonic_key(frame_id, "motor", header)
        if "counter_pos" in header and "counter_mne" in header:
            self.__catch_keys.add("counter_pos")
            self.__catch_keys.add("counter_mne")
            self._read_mnemonic_key(frame_id, "counter", header)
        FabioReader._read_frame(self, frame_id, header)

    def _is_filtered_key(self, key):
        if key in self.__catch_keys:
            return True
        return FabioReader._is_filtered_key(self, key)

    def _get_mnemonic_key(self, base_key, header):
        mnemonic_values_key = base_key + "_mne"
        mnemonic_values = header.get(mnemonic_values_key, "")
        mnemonic_values = mnemonic_values.split()
        pos_values_key = base_key + "_pos"
        pos_values = header.get(pos_values_key, "")
        pos_values = pos_values.split()

        result = collections.OrderedDict()
        nbitems = max(len(mnemonic_values), len(pos_values))
        for i in range(nbitems):
            if i < len(mnemonic_values):
                mnemonic = mnemonic_values[i]
            else:
                # skip the element
                continue

            if i < len(pos_values):
                pos = pos_values[i]
            else:
                pos = None

            result[mnemonic] = pos
        return result

    def _read_mnemonic_key(self, frame_id, base_key, header):
        """Parse a mnemonic key"""
        is_counter = base_key == "counter"
        is_positioner = base_key == "motor"
        data = self._get_mnemonic_key(base_key, header)

        for mnemonic, pos in data.items():
            if is_counter:
                self._set_counter_value(frame_id, mnemonic, pos)
            elif is_positioner:
                self._set_positioner_value(frame_id, mnemonic, pos)
            else:
                raise Exception("State unexpected (base_key: %s)" % base_key)

    def has_ub_matrix(self):
        """Returns true if a UB matrix is available.

        :rtype: bool
        """
        header = self.fabio_file().header
        expected_keys = set(["UB_mne", "UB_pos", "sample_mne", "sample_pos"])
        return expected_keys.issubset(header)

    def parse_ub_matrix(self):
        header = self.fabio_file().header
        ub_data = self._get_mnemonic_key("UB", header)
        s_data = self._get_mnemonic_key("sample", header)
        if len(ub_data) > 9:
            _logger.warning("UB_mne and UB_pos contains more than expected keys.")
        if len(s_data) > 6:
            _logger.warning("sample_mne and sample_pos contains more than expected keys.")

        data = numpy.array([s_data["U0"], s_data["U1"], s_data["U2"]], dtype=float)
        unit_cell_abc = data

        data = numpy.array([s_data["U3"], s_data["U4"], s_data["U5"]], dtype=float)
        unit_cell_alphabetagamma = data

        ub_matrix = numpy.array([[
            [ub_data["UB0"], ub_data["UB1"], ub_data["UB2"]],
            [ub_data["UB3"], ub_data["UB4"], ub_data["UB5"]],
            [ub_data["UB6"], ub_data["UB7"], ub_data["UB8"]]]], dtype=float)

        self.__unit_cell_abc = unit_cell_abc
        self.__unit_cell_alphabetagamma = unit_cell_alphabetagamma
        self.__ub_matrix = ub_matrix

    def get_unit_cell_abc(self):
        """Get a numpy array data as defined for the dataset unit_cell_abc
        from the NXsample dataset.

        :rtype: numpy.ndarray
        """
        if self.__unit_cell_abc is None:
            self.parse_ub_matrix()
        return self.__unit_cell_abc

    def get_unit_cell_alphabetagamma(self):
        """Get a numpy array data as defined for the dataset
        unit_cell_alphabetagamma from the NXsample dataset.

        :rtype: numpy.ndarray
        """
        if self.__unit_cell_alphabetagamma is None:
            self.parse_ub_matrix()
        return self.__unit_cell_alphabetagamma

    def get_ub_matrix(self):
        """Get a numpy array data as defined for the dataset ub_matrix
        from the NXsample dataset.

        :rtype: numpy.ndarray
        """
        if self.__ub_matrix is None:
            self.parse_ub_matrix()
        return self.__ub_matrix


class File(commonh5.File):
    """Class which handle a fabio image as a mimick of a h5py.File.
    """

    def __init__(self, file_name=None, fabio_image=None):
        self.__must_be_closed = False
        if file_name is not None and fabio_image is not None:
            raise TypeError("Parameters file_name and fabio_image are mutually exclusive.")
        if file_name is not None:
            self.__fabio_image = fabio.open(file_name)
            self.__must_be_closed = True
        elif fabio_image is not None:
            self.__fabio_image = fabio_image
            file_name = self.__fabio_image.filename
        attrs = {"NX_class": "NXroot",
                 "file_time": datetime.datetime.now().isoformat(),
                 "file_name": file_name,
                 "creator": "silx %s" % silx_version}
        commonh5.File.__init__(self, name=file_name, attrs=attrs)
        self.__fabio_reader = self.create_fabio_reader(self.__fabio_image)
        scan = self.create_scan_group(self.__fabio_image, self.__fabio_reader)
        self.add_node(scan)

    def create_scan_group(self, fabio_image, fabio_reader):
        """Factory to create the scan group.

        :param FabioImage fabio_image: A Fabio image
        :param FabioReader fabio_reader: A reader for the Fabio image
        :rtype: commonh5.Group
        """

        scan = commonh5.Group("scan_0", attrs={"NX_class": "NXentry"})
        instrument = commonh5.Group("instrument", attrs={"NX_class": "NXinstrument"})
        measurement = MeasurementGroup("measurement", fabio_reader, attrs={"NX_class": "NXcollection"})
        file_ = commonh5.Group("file", attrs={"NX_class": "NXcollection"})
        positioners = MetadataGroup("positioners", fabio_reader, FabioReader.POSITIONER, attrs={"NX_class": "NXpositioner"})
        raw_header = RawHeaderData("scan_header", fabio_image, self)
        detector = DetectorGroup("detector_0", fabio_reader)

        scan.add_node(instrument)
        instrument.add_node(positioners)
        instrument.add_node(file_)
        instrument.add_node(detector)
        file_.add_node(raw_header)
        scan.add_node(measurement)

        if fabio_reader.has_sample_information():
            sample = SampleGroup("sample", fabio_reader)
            scan.add_node(sample)

        return scan

    def create_fabio_reader(self, fabio_file):
        """Factory to create fabio reader.

        :rtype: FabioReader"""
        if isinstance(fabio_file, fabio.edfimage.EdfImage):
            metadata = EdfFabioReader(fabio_file)
        else:
            metadata = FabioReader(fabio_file)
        return metadata

    def close(self):
        """Close the object, and free up associated resources.

        The associated FabioImage is closed anyway the object was created from
        a filename or from a FabioImage.

        After calling this method, attempts to use the object may fail.
        """
        if self.__must_be_closed:
            # It looks like there is no close on FabioImage
            # self.__fabio_image.close()
            pass
        self.__fabio_image = None