# coding: utf-8 # /*########################################################################## # Copyright (C) 2016-2017 European Synchrotron Radiation Facility # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN # THE SOFTWARE. # # ############################################################################*/ """This module provides functions to read fabio images as an HDF5 file. >>> import silx.io.fabioh5 >>> f = silx.io.fabioh5.File("foobar.edf") .. note:: This module has a dependency on the `h5py `_ and `fabio `_ libraries, which are not mandatory dependencies for `silx`. """ import collections import datetime import logging import numbers import fabio import numpy from . import commonh5 from silx.third_party import six from silx import version as silx_version try: import h5py except ImportError as e: h5py = None _logger = logging.getLogger(__name__) class FrameData(commonh5.LazyLoadableDataset): """Expose a cube of image from a Fabio file using `FabioReader` as cache.""" def __init__(self, name, fabio_reader, parent=None): attrs = {"interpretation": "image"} commonh5.LazyLoadableDataset.__init__(self, name, parent, attrs=attrs) self.__fabio_reader = fabio_reader def _create_data(self): return self.__fabio_reader.get_data() class RawHeaderData(commonh5.LazyLoadableDataset): """Lazy loadable raw header""" def __init__(self, name, fabio_file, parent=None): commonh5.LazyLoadableDataset.__init__(self, name, parent) self.__fabio_file = fabio_file def _create_data(self): """Initialize hold data by merging all headers of each frames. """ headers = [] types = set([]) for frame in range(self.__fabio_file.nframes): if self.__fabio_file.nframes == 1: header = self.__fabio_file.header else: header = self.__fabio_file.getframe(frame).header data = [] for key, value in header.items(): data.append("%s: %s" % (str(key), str(value))) data = "\n".join(data) try: line = data.encode("ascii") types.add(numpy.string_) except UnicodeEncodeError: try: line = data.encode("utf-8") types.add(numpy.unicode_) except UnicodeEncodeError: # Fallback in void line = numpy.void(data) types.add(numpy.void) headers.append(line) if numpy.void in types: dtype = numpy.void elif numpy.unicode_ in types: dtype = numpy.unicode_ else: dtype = numpy.string_ if dtype == numpy.unicode_ and h5py is not None: # h5py only support vlen unicode dtype = h5py.special_dtype(vlen=six.text_type) return numpy.array(headers, dtype=dtype) class MetadataGroup(commonh5.LazyLoadableGroup): """Abstract class for groups containing a reference to a fabio image. """ def __init__(self, name, metadata_reader, kind, parent=None, attrs=None): commonh5.LazyLoadableGroup.__init__(self, name, parent, attrs) self.__metadata_reader = metadata_reader self.__kind = kind def _create_child(self): keys = self.__metadata_reader.get_keys(self.__kind) for name in keys: data = self.__metadata_reader.get_value(self.__kind, name) dataset = commonh5.Dataset(name, data) self.add_node(dataset) @property def _metadata_reader(self): return self.__metadata_reader class DetectorGroup(commonh5.LazyLoadableGroup): """Define the detector group (sub group of instrument) using Fabio data. """ def __init__(self, name, fabio_reader, parent=None, attrs=None): if attrs is None: attrs = {"NX_class": "NXdetector"} commonh5.LazyLoadableGroup.__init__(self, name, parent, attrs) self.__fabio_reader = fabio_reader def _create_child(self): data = FrameData("data", self.__fabio_reader) self.add_node(data) # TODO we should add here Nexus informations we can extract from the # metadata others = MetadataGroup("others", self.__fabio_reader, kind=FabioReader.DEFAULT) self.add_node(others) class ImageGroup(commonh5.LazyLoadableGroup): """Define the image group (sub group of measurement) using Fabio data. """ def __init__(self, name, fabio_reader, parent=None, attrs=None): commonh5.LazyLoadableGroup.__init__(self, name, parent, attrs) self.__fabio_reader = fabio_reader def _create_child(self): basepath = self.parent.parent.name data = commonh5.SoftLink("data", path=basepath + "/instrument/detector_0/data") self.add_node(data) detector = commonh5.SoftLink("info", path=basepath + "/instrument/detector_0") self.add_node(detector) class SampleGroup(commonh5.LazyLoadableGroup): """Define the image group (sub group of measurement) using Fabio data. """ def __init__(self, name, fabio_reader, parent=None): attrs = {"NXclass": "NXsample"} commonh5.LazyLoadableGroup.__init__(self, name, parent, attrs) self.__fabio_reader = fabio_reader def _create_child(self): if self.__fabio_reader.has_ub_matrix(): scalar = {"interpretation": "scalar"} data = self.__fabio_reader.get_unit_cell_abc() data = commonh5.Dataset("unit_cell_abc", data, attrs=scalar) self.add_node(data) unit_cell_data = numpy.zeros((1, 6), numpy.float32) unit_cell_data[0, :3] = data data = self.__fabio_reader.get_unit_cell_alphabetagamma() data = commonh5.Dataset("unit_cell_alphabetagamma", data, attrs=scalar) self.add_node(data) unit_cell_data[0, 3:] = data data = commonh5.Dataset("unit_cell", unit_cell_data, attrs=scalar) self.add_node(data) data = self.__fabio_reader.get_ub_matrix() data = commonh5.Dataset("ub_matrix", data, attrs=scalar) self.add_node(data) class MeasurementGroup(commonh5.LazyLoadableGroup): """Define the measurement group for fabio file. """ def __init__(self, name, fabio_reader, parent=None, attrs=None): commonh5.LazyLoadableGroup.__init__(self, name, parent, attrs) self.__fabio_reader = fabio_reader def _create_child(self): keys = self.__fabio_reader.get_keys(FabioReader.COUNTER) # create image measurement but take care that no other metadata use # this name for i in range(1000): name = "image_%i" % i if name not in keys: data = ImageGroup(name, self.__fabio_reader) self.add_node(data) break else: raise Exception("image_i for 0..1000 already used") # add all counters for name in keys: data = self.__fabio_reader.get_value(FabioReader.COUNTER, name) dataset = commonh5.Dataset(name, data) self.add_node(dataset) class FabioReader(object): """Class which read and cache data and metadata from a fabio image.""" DEFAULT = 0 COUNTER = 1 POSITIONER = 2 def __init__(self, fabio_file): self.__fabio_file = fabio_file self.__counters = {} self.__positioners = {} self.__measurements = {} self.__key_filters = set([]) self.__data = None self.__frame_count = self.__fabio_file.nframes self._read(self.__fabio_file) def fabio_file(self): return self.__fabio_file def _create_data(self): """Initialize hold data by merging all frames into a single cube. Choose the cube size which fit the best the data. If some images are smaller than expected, the empty space is set to 0. The computation is cached into the class, and only done ones. """ images = [] for frame in range(self.__fabio_file.nframes): if self.__fabio_file.nframes == 1: image = self.__fabio_file.data else: image = self.__fabio_file.getframe(frame).data images.append(image) # returns the data without extra dim in case of single frame if len(images) == 1: return images[0] # get the max size max_dim = max([i.ndim for i in images]) max_shape = [0] * max_dim for image in images: for dim in range(image.ndim): if image.shape[dim] > max_shape[dim]: max_shape[dim] = image.shape[dim] max_shape = tuple(max_shape) # fix smallest images for index, image in enumerate(images): if image.shape == max_shape: continue location = [slice(0, i) for i in image.shape] while len(location) < max_dim: location.append(0) normalized_image = numpy.zeros(max_shape, dtype=image.dtype) normalized_image[location] = image images[index] = normalized_image # create a cube return numpy.array(images) def __get_dict(self, kind): """Returns a dictionary from according to an expected kind""" if kind == self.DEFAULT: return self.__measurements elif kind == self.COUNTER: return self.__counters elif kind == self.POSITIONER: return self.__positioners else: raise Exception("Unexpected kind %s", kind) def get_data(self): """Returns a cube from all available data from frames :rtype: numpy.ndarray """ if self.__data is None: self.__data = self._create_data() return self.__data def get_keys(self, kind): """Get all available keys according to a kind of metadata. :rtype: list """ return self.__get_dict(kind).keys() def get_value(self, kind, name): """Get a metadata value according to the kind and the name. :rtype: numpy.ndarray """ value = self.__get_dict(kind)[name] if not isinstance(value, numpy.ndarray): value = self._convert_metadata_vector(value) self.__get_dict(kind)[name] = value return value def _set_counter_value(self, frame_id, name, value): """Set a counter metadata according to the frame id""" if name not in self.__counters: self.__counters[name] = [None] * self.__frame_count self.__counters[name][frame_id] = value def _set_positioner_value(self, frame_id, name, value): """Set a positioner metadata according to the frame id""" if name not in self.__positioners: self.__positioners[name] = [None] * self.__frame_count self.__positioners[name][frame_id] = value def _set_measurement_value(self, frame_id, name, value): """Set a measurement metadata according to the frame id""" if name not in self.__measurements: self.__measurements[name] = [None] * self.__frame_count self.__measurements[name][frame_id] = value def _read(self, fabio_file): """Read all metadata from the fabio file and store it into this object.""" self.__key_filters.clear() if hasattr(fabio_file, "RESERVED_HEADER_KEYS"): # Provided in fabio 0.5 for key in fabio_file.RESERVED_HEADER_KEYS: self.__key_filters.add(key.lower()) for frame in range(fabio_file.nframes): if fabio_file.nframes == 1: header = fabio_file.header else: header = fabio_file.getframe(frame).header self._read_frame(frame, header) def _is_filtered_key(self, key): """ If this function returns True, the :meth:`_read_key` while not be called with this `key`while reading the metatdata frame. :param str key: A key of the metadata :rtype: bool """ return key.lower() in self.__key_filters def _read_frame(self, frame_id, header): """Read all metadata from a frame and store it into this object.""" for key, value in header.items(): if self._is_filtered_key(key): continue self._read_key(frame_id, key, value) def _read_key(self, frame_id, name, value): """Read a key from the metadata and cache it into this object.""" self._set_measurement_value(frame_id, name, value) def _convert_metadata_vector(self, values): """Convert a list of numpy data into a numpy array with the better fitting type.""" converted = [] types = set([]) has_none = False for v in values: if v is None: converted.append(None) has_none = True else: c = self._convert_value(v) converted.append(c) types.add(c.dtype) if has_none and len(types) == 0: # That's a list of none values return numpy.array([0] * len(values), numpy.int8) result_type = numpy.result_type(*types) if issubclass(result_type.type, numpy.string_): # use the raw data to create the array result = values elif issubclass(result_type.type, numpy.unicode_): # use the raw data to create the array result = values else: result = converted if has_none: # Fix missing data according to the array type if result_type.kind == "S": none_value = b"" elif result_type.kind == "U": none_value = u"" elif result_type.kind == "f": none_value = numpy.float("NaN") elif result_type.kind == "i": none_value = numpy.int(0) elif result_type.kind == "u": none_value = numpy.int(0) elif result_type.kind == "b": none_value = numpy.bool(False) else: none_value = None for index, r in enumerate(result): if r is not None: continue result[index] = none_value return numpy.array(result, dtype=result_type) def _convert_value(self, value): """Convert a string into a numpy object (scalar or array). The value is most of the time a string, but it can be python object in case if TIFF decoder for example. """ if isinstance(value, list): # convert to a numpy array return numpy.array(value) if isinstance(value, dict): # convert to a numpy associative array key_dtype = numpy.min_scalar_type(list(value.keys())) value_dtype = numpy.min_scalar_type(list(value.values())) associative_type = [('key', key_dtype), ('value', value_dtype)] assert key_dtype.kind != "O" and value_dtype.kind != "O" return numpy.array(list(value.items()), dtype=associative_type) if isinstance(value, numbers.Number): dtype = numpy.min_scalar_type(value) assert dtype.kind != "O" return dtype.type(value) if isinstance(value, six.binary_type): try: value = value.decode('utf-8') except UnicodeDecodeError: return numpy.void(value) if " " in value: result = self._convert_list(value) else: result = self._convert_scalar_value(value) return result def _convert_scalar_value(self, value): """Convert a string into a numpy int or float. If it is not possible it returns a numpy string. """ try: value = int(value) dtype = numpy.min_scalar_type(value) assert dtype.kind != "O" return dtype.type(value) except ValueError: try: # numpy.min_scalar_type is not able to do very well the job # when there is a lot of digit after the dot # https://github.com/numpy/numpy/issues/8207 # Let's count the digit of the string digits = len(value) - 1 # minus the dot if digits <= 7: # A float32 is accurate with about 7 digits return numpy.float32(value) elif digits <= 16: # A float64 is accurate with about 16 digits return numpy.float64(value) else: if hasattr(numpy, "float128"): return numpy.float128(value) else: return numpy.float64(value) except ValueError: return numpy.string_(value) def _convert_list(self, value): """Convert a string into a typed numpy array. If it is not possible it returns a numpy string. """ try: numpy_values = [] values = value.split(" ") types = set([]) for string_value in values: v = self._convert_scalar_value(string_value) numpy_values.append(v) types.add(v.dtype.type) result_type = numpy.result_type(*types) if issubclass(result_type.type, (numpy.string_, six.binary_type)): # use the raw data to create the result return numpy.string_(value) elif issubclass(result_type.type, (numpy.unicode_, six.text_type)): # use the raw data to create the result return numpy.unicode_(value) else: return numpy.array(numpy_values, dtype=result_type) except ValueError: return numpy.string_(value) def has_sample_information(self): """Returns true if there is information about the sample in the file :rtype: bool """ return self.has_ub_matrix() def has_ub_matrix(self): """Returns true if a UB matrix is available. :rtype: bool """ return False class EdfFabioReader(FabioReader): """Class which read and cache data and metadata from a fabio image. It is mostly the same as FabioReader, but counter_mne and motor_mne are parsed using a special way. """ def __init__(self, fabio_file): FabioReader.__init__(self, fabio_file) self.__unit_cell_abc = None self.__unit_cell_alphabetagamma = None self.__ub_matrix = None def _read_frame(self, frame_id, header): """Overwrite the method to check and parse special keys: counter and motors keys.""" self.__catch_keys = set([]) if "motor_pos" in header and "motor_mne" in header: self.__catch_keys.add("motor_pos") self.__catch_keys.add("motor_mne") self._read_mnemonic_key(frame_id, "motor", header) if "counter_pos" in header and "counter_mne" in header: self.__catch_keys.add("counter_pos") self.__catch_keys.add("counter_mne") self._read_mnemonic_key(frame_id, "counter", header) FabioReader._read_frame(self, frame_id, header) def _is_filtered_key(self, key): if key in self.__catch_keys: return True return FabioReader._is_filtered_key(self, key) def _get_mnemonic_key(self, base_key, header): mnemonic_values_key = base_key + "_mne" mnemonic_values = header.get(mnemonic_values_key, "") mnemonic_values = mnemonic_values.split() pos_values_key = base_key + "_pos" pos_values = header.get(pos_values_key, "") pos_values = pos_values.split() result = collections.OrderedDict() nbitems = max(len(mnemonic_values), len(pos_values)) for i in range(nbitems): if i < len(mnemonic_values): mnemonic = mnemonic_values[i] else: # skip the element continue if i < len(pos_values): pos = pos_values[i] else: pos = None result[mnemonic] = pos return result def _read_mnemonic_key(self, frame_id, base_key, header): """Parse a mnemonic key""" is_counter = base_key == "counter" is_positioner = base_key == "motor" data = self._get_mnemonic_key(base_key, header) for mnemonic, pos in data.items(): if is_counter: self._set_counter_value(frame_id, mnemonic, pos) elif is_positioner: self._set_positioner_value(frame_id, mnemonic, pos) else: raise Exception("State unexpected (base_key: %s)" % base_key) def has_ub_matrix(self): """Returns true if a UB matrix is available. :rtype: bool """ header = self.fabio_file().header expected_keys = set(["UB_mne", "UB_pos", "sample_mne", "sample_pos"]) return expected_keys.issubset(header) def parse_ub_matrix(self): header = self.fabio_file().header ub_data = self._get_mnemonic_key("UB", header) s_data = self._get_mnemonic_key("sample", header) if len(ub_data) > 9: _logger.warning("UB_mne and UB_pos contains more than expected keys.") if len(s_data) > 6: _logger.warning("sample_mne and sample_pos contains more than expected keys.") data = numpy.array([s_data["U0"], s_data["U1"], s_data["U2"]], dtype=float) unit_cell_abc = data data = numpy.array([s_data["U3"], s_data["U4"], s_data["U5"]], dtype=float) unit_cell_alphabetagamma = data ub_matrix = numpy.array([[ [ub_data["UB0"], ub_data["UB1"], ub_data["UB2"]], [ub_data["UB3"], ub_data["UB4"], ub_data["UB5"]], [ub_data["UB6"], ub_data["UB7"], ub_data["UB8"]]]], dtype=float) self.__unit_cell_abc = unit_cell_abc self.__unit_cell_alphabetagamma = unit_cell_alphabetagamma self.__ub_matrix = ub_matrix def get_unit_cell_abc(self): """Get a numpy array data as defined for the dataset unit_cell_abc from the NXsample dataset. :rtype: numpy.ndarray """ if self.__unit_cell_abc is None: self.parse_ub_matrix() return self.__unit_cell_abc def get_unit_cell_alphabetagamma(self): """Get a numpy array data as defined for the dataset unit_cell_alphabetagamma from the NXsample dataset. :rtype: numpy.ndarray """ if self.__unit_cell_alphabetagamma is None: self.parse_ub_matrix() return self.__unit_cell_alphabetagamma def get_ub_matrix(self): """Get a numpy array data as defined for the dataset ub_matrix from the NXsample dataset. :rtype: numpy.ndarray """ if self.__ub_matrix is None: self.parse_ub_matrix() return self.__ub_matrix class File(commonh5.File): """Class which handle a fabio image as a mimick of a h5py.File. """ def __init__(self, file_name=None, fabio_image=None): self.__must_be_closed = False if file_name is not None and fabio_image is not None: raise TypeError("Parameters file_name and fabio_image are mutually exclusive.") if file_name is not None: self.__fabio_image = fabio.open(file_name) self.__must_be_closed = True elif fabio_image is not None: self.__fabio_image = fabio_image file_name = self.__fabio_image.filename attrs = {"NX_class": "NXroot", "file_time": datetime.datetime.now().isoformat(), "file_name": file_name, "creator": "silx %s" % silx_version} commonh5.File.__init__(self, name=file_name, attrs=attrs) self.__fabio_reader = self.create_fabio_reader(self.__fabio_image) scan = self.create_scan_group(self.__fabio_image, self.__fabio_reader) self.add_node(scan) def create_scan_group(self, fabio_image, fabio_reader): """Factory to create the scan group. :param FabioImage fabio_image: A Fabio image :param FabioReader fabio_reader: A reader for the Fabio image :rtype: commonh5.Group """ scan = commonh5.Group("scan_0", attrs={"NX_class": "NXentry"}) instrument = commonh5.Group("instrument", attrs={"NX_class": "NXinstrument"}) measurement = MeasurementGroup("measurement", fabio_reader, attrs={"NX_class": "NXcollection"}) file_ = commonh5.Group("file", attrs={"NX_class": "NXcollection"}) positioners = MetadataGroup("positioners", fabio_reader, FabioReader.POSITIONER, attrs={"NX_class": "NXpositioner"}) raw_header = RawHeaderData("scan_header", fabio_image, self) detector = DetectorGroup("detector_0", fabio_reader) scan.add_node(instrument) instrument.add_node(positioners) instrument.add_node(file_) instrument.add_node(detector) file_.add_node(raw_header) scan.add_node(measurement) if fabio_reader.has_sample_information(): sample = SampleGroup("sample", fabio_reader) scan.add_node(sample) return scan def create_fabio_reader(self, fabio_file): """Factory to create fabio reader. :rtype: FabioReader""" if isinstance(fabio_file, fabio.edfimage.EdfImage): metadata = EdfFabioReader(fabio_file) else: metadata = FabioReader(fabio_file) return metadata def close(self): """Close the object, and free up associated resources. The associated FabioImage is closed anyway the object was created from a filename or from a FabioImage. After calling this method, attempts to use the object may fail. """ if self.__must_be_closed: # It looks like there is no close on FabioImage # self.__fabio_image.close() pass self.__fabio_image = None