diff options
Diffstat (limited to 'src/silx/io')
53 files changed, 23174 insertions, 0 deletions
diff --git a/src/silx/io/__init__.py b/src/silx/io/__init__.py new file mode 100644 index 0000000..b43d290 --- /dev/null +++ b/src/silx/io/__init__.py @@ -0,0 +1,51 @@ +# coding: utf-8 +# /*########################################################################## +# +# Copyright (c) 2016-2018 European Synchrotron Radiation Facility +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. +# +# ###########################################################################*/ +"""This package provides functionalities to read and write data files. + +It is geared towards support of and conversion to HDF5/NeXus. + +See silx documentation: http://www.silx.org/doc/silx/latest/ +""" + +__authors__ = ["P. Knobel"] +__license__ = "MIT" +__date__ = "11/12/2017" + + +from .utils import open # pylint:disable=redefined-builtin +from .utils import save1D + +from .utils import is_dataset +from .utils import is_file +from .utils import is_group +from .utils import is_softlink +from .utils import supported_extensions +from .utils import get_data + +# avoid to import open with "import *" +__all = locals().keys() +__all = filter(lambda x: not x.startswith("_"), __all) +__all = filter(lambda x: x != "open", __all) +__all__ = list(__all) diff --git a/src/silx/io/commonh5.py b/src/silx/io/commonh5.py new file mode 100644 index 0000000..af4274f --- /dev/null +++ b/src/silx/io/commonh5.py @@ -0,0 +1,1061 @@ +# coding: utf-8 +# /*########################################################################## +# Copyright (C) 2016-2021 European Synchrotron Radiation Facility +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. +# +# ############################################################################*/ +""" +This module contains generic objects, emulating *h5py* groups, datasets and +files. They are used in :mod:`spech5` and :mod:`fabioh5`. +""" +import collections +try: + from collections import abc +except ImportError: # Python2 support + import collections as abc +import weakref + +import h5py +import numpy + +from . import utils + +__authors__ = ["V. Valls", "P. Knobel"] +__license__ = "MIT" +__date__ = "02/07/2018" + + +class _MappingProxyType(abc.MutableMapping): + """Read-only dictionary + + This class is available since Python 3.3, but not on earlyer Python + versions. + """ + + def __init__(self, data): + self._data = data + + def __getitem__(self, key): + return self._data[key] + + def __len__(self): + return len(self._data) + + def __iter__(self): + return iter(self._data) + + def get(self, key, default=None): + return self._data.get(key, default) + + def __setitem__(self, key, value): + raise RuntimeError("Cannot modify read-only dictionary") + + def __delitem__(self, key): + raise RuntimeError("Cannot modify read-only dictionary") + + def pop(self, key): + raise RuntimeError("Cannot modify read-only dictionary") + + def clear(self): + raise RuntimeError("Cannot modify read-only dictionary") + + def update(self, key, value): + raise RuntimeError("Cannot modify read-only dictionary") + + def setdefault(self, key): + raise RuntimeError("Cannot modify read-only dictionary") + + +class Node(object): + """This is the base class for all :mod:`spech5` and :mod:`fabioh5` + classes. It represents a tree node, and knows its parent node + (:attr:`parent`). + The API mimics a *h5py* node, with following attributes: :attr:`file`, + :attr:`attrs`, :attr:`name`, and :attr:`basename`. + """ + + def __init__(self, name, parent=None, attrs=None): + self._set_parent(parent) + self.__basename = name + self.__attrs = {} + if attrs is not None: + self.__attrs.update(attrs) + + def _set_basename(self, name): + self.__basename = name + + @property + def h5_class(self): + """Returns the HDF5 class which is mimicked by this class. + + :rtype: H5Type + """ + raise NotImplementedError() + + @property + def h5py_class(self): + """Returns the h5py classes which is mimicked by this class. It can be + one of `h5py.File, h5py.Group` or `h5py.Dataset` + + This should not be used anymore. Prefer using `h5_class` + + :rtype: Class + """ + h5_class = self.h5_class + if h5_class == utils.H5Type.FILE: + return h5py.File + elif h5_class == utils.H5Type.GROUP: + return h5py.Group + elif h5_class == utils.H5Type.DATASET: + return h5py.Dataset + elif h5_class == utils.H5Type.SOFT_LINK: + return h5py.SoftLink + raise NotImplementedError() + + @property + def parent(self): + """Returns the parent of the node. + + :rtype: Node + """ + if self.__parent is None: + parent = None + else: + parent = self.__parent() + if parent is None: + self.__parent = None + return parent + + def _set_parent(self, parent): + """Set the parent of this node. + + It do not update the parent object. + + :param Node parent: New parent for this node + """ + if parent is not None: + self.__parent = weakref.ref(parent) + else: + self.__parent = None + + @property + def file(self): + """Returns the file node of this node. + + :rtype: Node + """ + node = self + while node.parent is not None: + node = node.parent + if isinstance(node, File): + return node + else: + return None + + @property + def attrs(self): + """Returns HDF5 attributes of this node. + + :rtype: dict + """ + if self._is_editable(): + return self.__attrs + else: + return _MappingProxyType(self.__attrs) + + @property + def name(self): + """Returns the HDF5 name of this node. + """ + parent = self.parent + if parent is None: + return "/" + if parent.name == "/": + return "/" + self.basename + return parent.name + "/" + self.basename + + @property + def basename(self): + """Returns the HDF5 basename of this node. + """ + return self.__basename + + def _is_editable(self): + """Returns true if the file is editable or if the node is not linked + to a tree. + + :rtype: bool + """ + f = self.file + return f is None or f.mode == "w" + + +class Dataset(Node): + """This class handles a numpy data object, as a mimicry of a + *h5py.Dataset*. + """ + + def __init__(self, name, data, parent=None, attrs=None): + Node.__init__(self, name, parent, attrs=attrs) + if data is not None: + self._check_data(data) + self.__data = data + + def _check_data(self, data): + """Check that the data provided by the dataset is valid. + + It is valid when it can be stored in a HDF5 using h5py. + + :param numpy.ndarray data: Data associated to the dataset + :raises TypeError: In the case the data is not valid. + """ + if isinstance(data, (str, bytes)): + return + + chartype = data.dtype.char + if chartype == "U": + pass + elif chartype == "O": + d = h5py.special_dtype(vlen=data.dtype) + if d is not None: + return + d = h5py.special_dtype(ref=data.dtype) + if d is not None: + return + else: + return + + msg = "Type of the dataset '%s' is not supported. Found '%s'." + raise TypeError(msg % (self.name, data.dtype)) + + def _set_data(self, data): + """Set the data exposed by the dataset. + + It have to be called only one time before the data is used. It should + not be edited after use. + + :param numpy.ndarray data: Data associated to the dataset + """ + self._check_data(data) + self.__data = data + + def _get_data(self): + """Returns the exposed data + + :rtype: numpy.ndarray + """ + return self.__data + + @property + def h5_class(self): + """Returns the HDF5 class which is mimicked by this class. + + :rtype: H5Type + """ + return utils.H5Type.DATASET + + @property + def dtype(self): + """Returns the numpy datatype exposed by this dataset. + + :rtype: numpy.dtype + """ + return self._get_data().dtype + + @property + def shape(self): + """Returns the shape of the data exposed by this dataset. + + :rtype: tuple + """ + if isinstance(self._get_data(), numpy.ndarray): + return self._get_data().shape + else: + return tuple() + + @property + def size(self): + """Returns the size of the data exposed by this dataset. + + :rtype: int + """ + if isinstance(self._get_data(), numpy.ndarray): + return self._get_data().size + else: + # It is returned as float64 1.0 by h5py + return numpy.float64(1.0) + + def __len__(self): + """Returns the size of the data exposed by this dataset. + + :rtype: int + """ + if isinstance(self._get_data(), numpy.ndarray): + return len(self._get_data()) + else: + # It is returned as float64 1.0 by h5py + raise TypeError("Attempt to take len() of scalar dataset") + + def __getitem__(self, item): + """Returns the slice of the data exposed by this dataset. + + :rtype: numpy.ndarray + """ + if not isinstance(self._get_data(), numpy.ndarray): + if item == Ellipsis: + return numpy.array(self._get_data()) + elif item == tuple(): + return self._get_data() + else: + raise ValueError("Scalar can only be reached with an ellipsis or an empty tuple") + return self._get_data().__getitem__(item) + + def __str__(self): + basename = self.name.split("/")[-1] + return '<HDF5-like dataset "%s": shape %s, type "%s">' % \ + (basename, self.shape, self.dtype.str) + + def __getslice__(self, i, j): + """Returns the slice of the data exposed by this dataset. + + Deprecated but still in use for python 2.7 + + :rtype: numpy.ndarray + """ + return self.__getitem__(slice(i, j, None)) + + @property + def value(self): + """Returns the data exposed by this dataset. + + Deprecated by h5py. It is prefered to use indexing `[()]`. + + :rtype: numpy.ndarray + """ + return self._get_data() + + @property + def compression(self): + """Returns compression as provided by `h5py.Dataset`. + + There is no compression.""" + return None + + @property + def compression_opts(self): + """Returns compression options as provided by `h5py.Dataset`. + + There is no compression.""" + return None + + @property + def chunks(self): + """Returns chunks as provided by `h5py.Dataset`. + + There is no chunks.""" + return None + + @property + def is_virtual(self): + """Checks virtual data as provided by `h5py.Dataset`""" + return False + + def virtual_sources(self): + """Returns virtual dataset sources as provided by `h5py.Dataset`. + + :rtype: list""" + raise RuntimeError("Not a virtual dataset") + + @property + def external(self): + """Returns external sources as provided by `h5py.Dataset`. + + :rtype: list or None""" + return None + + def __array__(self, dtype=None): + # Special case for (0,)*-shape datasets + if numpy.product(self.shape) == 0: + return self[()] + else: + return numpy.array(self[...], dtype=self.dtype if dtype is None else dtype) + + def __iter__(self): + """Iterate over the first axis. TypeError if scalar.""" + if len(self.shape) == 0: + raise TypeError("Can't iterate over a scalar dataset") + return self._get_data().__iter__() + + # make comparisons and operations on the data + def __eq__(self, other): + """When comparing datasets, compare the actual data.""" + if utils.is_dataset(other): + return self[()] == other[()] + return self[()] == other + + def __add__(self, other): + return self[()] + other + + def __radd__(self, other): + return other + self[()] + + def __sub__(self, other): + return self[()] - other + + def __rsub__(self, other): + return other - self[()] + + def __mul__(self, other): + return self[()] * other + + def __rmul__(self, other): + return other * self[()] + + def __truediv__(self, other): + return self[()] / other + + def __rtruediv__(self, other): + return other / self[()] + + def __floordiv__(self, other): + return self[()] // other + + def __rfloordiv__(self, other): + return other // self[()] + + def __neg__(self): + return -self[()] + + def __abs__(self): + return abs(self[()]) + + def __float__(self): + return float(self[()]) + + def __int__(self): + return int(self[()]) + + def __bool__(self): + if self[()]: + return True + return False + + def __nonzero__(self): + # python 2 + return self.__bool__() + + def __ne__(self, other): + if utils.is_dataset(other): + return self[()] != other[()] + else: + return self[()] != other + + def __lt__(self, other): + if utils.is_dataset(other): + return self[()] < other[()] + else: + return self[()] < other + + def __le__(self, other): + if utils.is_dataset(other): + return self[()] <= other[()] + else: + return self[()] <= other + + def __gt__(self, other): + if utils.is_dataset(other): + return self[()] > other[()] + else: + return self[()] > other + + def __ge__(self, other): + if utils.is_dataset(other): + return self[()] >= other[()] + else: + return self[()] >= other + + def __getattr__(self, item): + """Proxy to underlying numpy array methods. + """ + data = self._get_data() + if hasattr(data, item): + return getattr(data, item) + + raise AttributeError("Dataset has no attribute %s" % item) + + +class DatasetProxy(Dataset): + """Virtual dataset providing content of another dataset""" + + def __init__(self, name, target, parent=None): + Dataset.__init__(self, name, data=None, parent=parent) + if not utils.is_dataset(target): + raise TypeError("A Dataset is expected but %s found", target.__class__) + self.__target = target + + @property + def shape(self): + return self.__target.shape + + @property + def size(self): + return self.__target.size + + @property + def dtype(self): + return self.__target.dtype + + def _get_data(self): + return self.__target[...] + + @property + def attrs(self): + return self.__target.attrs + + +class _LinkToDataset(Dataset): + """Virtual dataset providing link to another dataset""" + + def __init__(self, name, target, parent=None): + Dataset.__init__(self, name, data=None, parent=parent) + self.__target = target + + def _get_data(self): + return self.__target._get_data() + + @property + def attrs(self): + return self.__target.attrs + + +class LazyLoadableDataset(Dataset): + """Abstract dataset which provides a lazy loading of the data. + + The class has to be inherited and the :meth:`_create_data` method has to be + implemented to return the numpy data exposed by the dataset. This factory + method is only called once, when the data is needed. + """ + + def __init__(self, name, parent=None, attrs=None): + super(LazyLoadableDataset, self).__init__(name, None, parent, attrs=attrs) + self._is_initialized = False + + def _create_data(self): + """ + Factory to create the data exposed by the dataset when it is needed. + + It has to be implemented for the class to work. + + :rtype: numpy.ndarray + """ + raise NotImplementedError() + + def _get_data(self): + """Returns the data exposed by the dataset. + + Overwrite Dataset method :meth:`_get_data` to implement the lazy + loading feature. + + :rtype: numpy.ndarray + """ + if not self._is_initialized: + data = self._create_data() + # is_initialized before set_data to avoid infinit initialization + # is case of wrong check of the data + self._is_initialized = True + self._set_data(data) + return super(LazyLoadableDataset, self)._get_data() + + +class SoftLink(Node): + """This class is a tree node that mimics a *h5py.Softlink*. + + In this implementation, the path to the target must be absolute. + """ + def __init__(self, name, path, parent=None): + assert str(path).startswith("/") # TODO: h5py also allows a relative path + + Node.__init__(self, name, parent) + + # attr target defined for spech5 backward compatibility + self.target = str(path) + + @property + def h5_class(self): + """Returns the HDF5 class which is mimicked by this class. + + :rtype: H5Type + """ + return utils.H5Type.SOFT_LINK + + @property + def path(self): + """Soft link value. Not guaranteed to be a valid path.""" + return self.target + + +class Group(Node): + """This class mimics a `h5py.Group`.""" + + def __init__(self, name, parent=None, attrs=None): + Node.__init__(self, name, parent, attrs=attrs) + self.__items = collections.OrderedDict() + + def _get_items(self): + """Returns the child items as a name-node dictionary. + + :rtype: dict + """ + return self.__items + + def add_node(self, node): + """Add a child to this group. + + :param Node node: Child to add to this group + """ + self._get_items()[node.basename] = node + node._set_parent(self) + + @property + def h5_class(self): + """Returns the HDF5 class which is mimicked by this class. + + :rtype: H5Type + """ + return utils.H5Type.GROUP + + def _get(self, name, getlink): + """If getlink is True and name points to an existing SoftLink, this + SoftLink is returned. In all other situations, we try to return a + Group or Dataset, or we raise a KeyError if we fail.""" + if "/" not in name: + result = self._get_items()[name] + elif name.startswith("/"): + root = self.file + if name == "/": + return root + result = root._get(name[1:], getlink) + else: + path = name.split("/") + result = self + for item_name in path: + if isinstance(result, SoftLink): + # traverse links + l_name, l_target = result.name, result.path + result = result.file.get(l_target) + if result is None: + raise KeyError( + "Unable to open object (broken SoftLink %s -> %s)" % + (l_name, l_target)) + if not item_name: + # trailing "/" in name (legal for accessing Groups only) + if isinstance(result, Group): + continue + if not isinstance(result, Group): + raise KeyError("Unable to open object (Component not found)") + result = result._get_items()[item_name] + + if isinstance(result, SoftLink) and not getlink: + link = result + target = result.file.get(link.path) + if result is None: + msg = "Unable to open object (broken SoftLink %s -> %s)" + raise KeyError(msg % (link.name, link.path)) + # Convert SoftLink into typed group/dataset + if isinstance(target, Group): + result = _LinkToGroup(name=link.basename, target=target, parent=link.parent) + elif isinstance(target, Dataset): + result = _LinkToDataset(name=link.basename, target=target, parent=link.parent) + else: + raise TypeError("Unexpected target type %s" % type(target)) + + return result + + def get(self, name, default=None, getclass=False, getlink=False): + """Retrieve an item or other information. + + If getlink only is true, the returned value is always `h5py.HardLink`, + because this implementation do not use links. Like the original + implementation. + + :param str name: name of the item + :param object default: default value returned if the name is not found + :param bool getclass: if true, the returned object is the class of the object found + :param bool getlink: if true, links object are returned instead of the target + :return: An object, else None + :rtype: object + """ + if name not in self: + return default + + node = self._get(name, getlink=True) + if isinstance(node, SoftLink) and not getlink: + # get target + try: + node = self._get(name, getlink=False) + except KeyError: + return default + elif not isinstance(node, SoftLink) and getlink: + # ExternalLink objects don't exist in silx, so it must be a HardLink + node = h5py.HardLink() + + if getclass: + obj = utils.get_h5py_class(node) + if obj is None: + obj = node.__class__ + else: + obj = node + return obj + + def __setitem__(self, name, obj): + """Add an object to the group. + + :param str name: Location on the group to store the object. + This path name must not exists. + :param object obj: Object to store on the file. According to the type, + the behaviour will not be the same. + + - `commonh5.SoftLink`: Create the corresponding link. + - `numpy.ndarray`: The array is converted to a dataset object. + - `commonh5.Node`: A hard link should be created pointing to the + given object. This implementation uses a soft link. + If the node do not have parent it is connected to the tree + without using a link (that's a hard link behaviour). + - other object: Convert first the object with ndarray and then + store it. ValueError if the resulting array dtype is not + supported. + """ + if name in self: + # From the h5py API + raise RuntimeError("Unable to create link (name already exists)") + + elements = name.rsplit("/", 1) + if len(elements) == 1: + parent = self + basename = elements[0] + else: + group_path, basename = elements + if group_path in self: + parent = self[group_path] + else: + parent = self.create_group(group_path) + + if isinstance(obj, SoftLink): + obj._set_basename(basename) + node = obj + elif isinstance(obj, Node): + if obj.parent is None: + obj._set_basename(basename) + node = obj + else: + node = SoftLink(basename, obj.name) + elif isinstance(obj, numpy.dtype): + node = Dataset(basename, data=obj) + elif isinstance(obj, numpy.ndarray): + node = Dataset(basename, data=obj) + else: + data = numpy.array(obj) + try: + node = Dataset(basename, data=data) + except TypeError as e: + raise ValueError(e.args[0]) + + parent.add_node(node) + + def __getitem__(self, name): + """Return a child from his name. + + :param str name: name of a member or a path throug members using '/' + separator. A '/' as a prefix access to the root item of the tree. + :rtype: Node + """ + if name is None or name == "": + raise ValueError("No name") + return self._get(name, getlink=False) + + def __contains__(self, name): + """Returns true if name is an existing child of this group. + + :rtype: bool + """ + if "/" not in name: + return name in self._get_items() + + if name.startswith("/"): + # h5py allows to access any valid full path from any group + node = self.file + else: + node = self + + name = name.lstrip("/") + basenames = name.split("/") + for basename in basenames: + if basename.strip() == "": + # presence of a trailing "/" in name + # (OK for groups, not for datasets) + if isinstance(node, SoftLink): + # traverse links + node = node.file.get(node.path, getlink=False) + if node is None: + # broken link + return False + if utils.is_dataset(node): + return False + continue + if basename not in node._get_items(): + return False + node = node[basename] + + return True + + def __len__(self): + """Returns the number of children contained in this group. + + :rtype: int + """ + return len(self._get_items()) + + def __iter__(self): + """Iterate over member names""" + for x in self._get_items().__iter__(): + yield x + + def keys(self): + """Returns an iterator over the children's names in a group.""" + return self._get_items().keys() + + def values(self): + """Returns an iterator over the children nodes (groups and datasets) + in a group. + + .. versionadded:: 0.6 + """ + return self._get_items().values() + + def items(self): + """Returns items iterator containing name-node mapping. + + :rtype: iterator + """ + return self._get_items().items() + + def visit(self, func, visit_links=False): + """Recursively visit all names in this group and subgroups. + See the documentation for `h5py.Group.visit` for more help. + + :param func: Callable (function, method or callable object) + :type func: callable + """ + origin_name = self.name + return self._visit(func, origin_name, visit_links) + + def visititems(self, func, visit_links=False): + """Recursively visit names and objects in this group. + See the documentation for `h5py.Group.visititems` for more help. + + :param func: Callable (function, method or callable object) + :type func: callable + :param bool visit_links: If *False*, ignore links. If *True*, + call `func(name)` for links and recurse into target groups. + """ + origin_name = self.name + return self._visit(func, origin_name, visit_links, + visititems=True) + + def _visit(self, func, origin_name, + visit_links=False, visititems=False): + """ + + :param origin_name: name of first group that initiated the recursion + This is used to compute the relative path from each item's + absolute path. + """ + for member in self.values(): + ret = None + if not isinstance(member, SoftLink) or visit_links: + relative_name = member.name[len(origin_name):] + # remove leading slash and unnecessary trailing slash + relative_name = relative_name.strip("/") + if visititems: + ret = func(relative_name, member) + else: + ret = func(relative_name) + if ret is not None: + return ret + if isinstance(member, Group): + member._visit(func, origin_name, visit_links, visititems) + + def create_group(self, name): + """Create and return a new subgroup. + + Name may be absolute or relative. Fails if the target name already + exists. + + :param str name: Name of the new group + """ + if not self._is_editable(): + raise RuntimeError("File is not editable") + if name in self: + raise ValueError("Unable to create group (name already exists)") + + if name.startswith("/"): + name = name[1:] + return self.file.create_group(name) + + elements = name.split('/') + group = self + for basename in elements: + if basename in group: + group = group[basename] + if not isinstance(group, Group): + raise RuntimeError("Unable to create group (group parent is missing") + else: + node = Group(basename) + group.add_node(node) + group = node + return group + + def create_dataset(self, name, shape=None, dtype=None, data=None, **kwds): + """Create and return a sub dataset. + + :param str name: Name of the dataset. + :param shape: Dataset shape. Use "()" for scalar datasets. + Required if "data" isn't provided. + :param dtype: Numpy dtype or string. + If omitted, dtype('f') will be used. + Required if "data" isn't provided; otherwise, overrides data + array's dtype. + :param numpy.ndarray data: Provide data to initialize the dataset. + If used, you can omit shape and dtype arguments. + :param kwds: Extra arguments. Nothing yet supported. + """ + if not self._is_editable(): + raise RuntimeError("File is not editable") + if len(kwds) > 0: + raise TypeError("Extra args provided, but nothing supported") + if "/" in name: + raise TypeError("Path are not supported") + if data is None: + if dtype is None: + dtype = numpy.float64 + data = numpy.empty(shape=shape, dtype=dtype) + elif dtype is not None: + data = data.astype(dtype) + dataset = Dataset(name, data) + self.add_node(dataset) + return dataset + + +class _LinkToGroup(Group): + """Virtual group providing link to another group""" + + def __init__(self, name, target, parent=None): + Group.__init__(self, name, parent=parent) + self.__target = target + + def _get_items(self): + return self.__target._get_items() + + @property + def attrs(self): + return self.__target.attrs + + +class LazyLoadableGroup(Group): + """Abstract group which provides a lazy loading of the child. + + The class has to be inherited and the :meth:`_create_child` method has + to be implemented to add (:meth:`_add_node`) all children. This factory + is only called once, when children are needed. + """ + + def __init__(self, name, parent=None, attrs=None): + Group.__init__(self, name, parent, attrs) + self.__is_initialized = False + + def _get_items(self): + """Returns the internal structure which contains the children. + + It overwrite method :meth:`_get_items` to implement the lazy + loading feature. + + :rtype: dict + """ + if not self.__is_initialized: + self.__is_initialized = True + self._create_child() + return Group._get_items(self) + + def _create_child(self): + """ + Factory to create the child contained by the group when it is needed. + + It has to be implemented to work. + """ + raise NotImplementedError() + + +class File(Group): + """This class is the special :class:`Group` that is the root node + of the tree structure. It mimics `h5py.File`.""" + + def __init__(self, name=None, mode=None, attrs=None): + """ + Constructor + + :param str name: File name if it exists + :param str mode: Access mode + - "r": Read-only. Methods :meth:`create_dataset` and + :meth:`create_group` are locked. + - "w": File is editable. Methods :meth:`create_dataset` and + :meth:`create_group` are available. + :param dict attrs: Default attributes + """ + Group.__init__(self, name="", parent=None, attrs=attrs) + self._file_name = name + if mode is None: + mode = "r" + assert(mode in ["r", "w"]) + self._mode = mode + + @property + def filename(self): + return self._file_name + + @property + def mode(self): + return self._mode + + @property + def h5_class(self): + """Returns the :class:`h5py.File` class""" + return utils.H5Type.FILE + + def __enter__(self): + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + self.close() + + def close(self): + """Close the object, and free up associated resources. + """ + # should be implemented in subclass + pass diff --git a/src/silx/io/configdict.py b/src/silx/io/configdict.py new file mode 100644 index 0000000..c028211 --- /dev/null +++ b/src/silx/io/configdict.py @@ -0,0 +1,540 @@ +# /*########################################################################## +# Copyright (C) 2004-2018 European Synchrotron Radiation Facility +# +# This file is part of the PyMca X-ray Fluorescence Toolkit developed at +# the ESRF by the Software group. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. +# +# ############################################################################*/ +""" +This module handles read and write operations to INI files, with data type +preservation and support for nesting subsections to any depth. + +Data to be written to INI must be stored in a dictionary with string keys. +Data cannot be stored at the root level of the dictionary, it must be inside +a sub-dictionary. This means that in the INI file, all parameters must be +in a section, and if you need a `default` section you must define it +explicitly. + +Usage example: +============== + +Write a dictionary to an INI file:: + + from silx.io.configdict import ConfigDict + + ddict = { + 'simple_types': { + 'float': 1.0, + 'int': 1, + 'string': 'Hello World', + }, + 'containers': { + 'list': [-1, 'string', 3.0, False], + 'array': numpy.array([1.0, 2.0, 3.0]), + 'dict': { + 'key1': 'Hello World', + 'key2': 2.0, + } + } + } + + ConfigDict(initdict=ddict).write("foo.ini") + + +Read an INI file into a dictionary like structure:: + + from silx.io.configdict import ConfigDict + + confdict = ConfigDict() + confdict.read("foo.ini") + + print("Available sections in INI file:") + print(confdict.keys()) + + for key in confdict: + for subkey in confdict[key]: + print("Section %s, parameter %s:" % (key, subkey)) + print(confdict[key][subkey]) + + +Classes: +======== + +- :class:`ConfigDict` +- :class:`OptionStr` + +""" + + +__author__ = ["E. Papillon", "V.A. Sole", "P. Knobel"] +__license__ = "MIT" +__date__ = "15/09/2016" + +from collections import OrderedDict +import numpy +import re +import sys +if sys.version_info < (3, ): + import ConfigParser as configparser +else: + import configparser + + +string_types = (basestring,) if sys.version_info[0] == 2 else (str,) # noqa + + +def _boolean(sstr): + """Coerce a string to a boolean following the same convention as + :meth:`configparser.ConfigParser.getboolean`: + - '1', 'yes', 'true' and 'on' cause this function to return ``True`` + - '0', 'no', 'false' and 'off' cause this function to return ``False`` + + :param sstr: String representation of a boolean + :return: ``True`` or ``False`` + :raise: ``ValueError`` if ``sstr`` is not a valid string representation + of a boolean + """ + if sstr.lower() in ['1', 'yes', 'true', 'on']: + return True + if sstr.lower() in ['0', 'no', 'false', 'off']: + return False + msg = "Cannot coerce string '%s' to a boolean value. " % sstr + msg += "Valid boolean strings: '1', 'yes', 'true', 'on', " + msg += "'0', 'no', 'false', 'off'" + raise ValueError(msg) + + +def _parse_simple_types(sstr): + """Coerce a string representation of a value to the most appropriate data + type, by trial and error. + + Typecasting is attempted to following data types (in this order): + `int`, `float`, `boolean`. If all of these conversions fail, ``sstr`` + is assumed to be a string. + + :param sstr: String representation of an unknown data type + :return: Value coerced into the most appropriate data type + """ + try: + return int(sstr) + except ValueError: + try: + return float(sstr) + except ValueError: + try: + return _boolean(sstr) + except ValueError: + if sstr.strip() == "None": + return None + # un-escape string + sstr = sstr.lstrip("\\") + # un-escape commas + sstr = sstr.replace(r"\,", ",").replace("^@", ",") + return sstr + + +def _parse_container(sstr): + """Parse a string representation of a list or a numpy array. + + A string such as ``"-1, Hello World, 3.0"`` is interpreted as the list + ``[-1, "Hello World", 3.0]``. ``"-1, "no", 3.0\n\t1, 2"`` is interpreted + a list of 2 lists ``[[-1, False, 3.0], [1, 2]]`` + + Strings such as ``"[ [ 1. 2. 3.] [ 4. 5. 6.] ]"`` or + ``[ 1.0 2.0 3.0 ]`` are interpreted as numpy arrays. Only 1D and 2D + arrays are permitted. + + :param sstr: String representation of an container type + :return: List or array + :raise: ``ValueError`` if string is not a list or an array + """ + sstr = sstr.strip() + + if not sstr: + raise ValueError + + if sstr.find(',') == -1: + # it is not a list + if (sstr[0] == '[') and (sstr[-1] == ']'): + # this looks like an array + try: + # try parsing as a 1D array + return numpy.array([float(x) for x in sstr[1:-1].split()]) + except ValueError: + # try parsing as a 2D array + if (sstr[2] == '[') and (sstr[-3] == ']'): + nrows = len(sstr[3:-3].split('] [')) + data = sstr[3:-3].replace('] [', ' ') + data = numpy.array([float(x) for x in + data.split()]) + data.shape = nrows, -1 + return data + # not a list and not an array + raise ValueError + else: + # if all commas are escaped, it is a strinq, not a list + if sstr.count(",") == sstr.count(r"\,"): + raise ValueError + + dataline = [line for line in sstr.splitlines()] + if len(dataline) == 1: + return _parse_list_line(dataline[0]) + else: + return [_parse_list_line(line) for line in dataline] + + +def _parse_list_line(sstr): + """Parse the string representation of a simple 1D list: + + ``"12, 13.1, True, Hello"`` ``->`` ``[12, 13.1, True, "Hello"]`` + + :param sstr: String + :return: List + """ + sstr = sstr.strip() + + # preserve escaped commas in strings before splitting list + # (_parse_simple_types recognizes ^@ as a comma) + sstr.replace(r"\,", "^@") + # it is a list + if sstr.endswith(','): + if ',' in sstr[:-1]: + return [_parse_simple_types(sstr2.strip()) + for sstr2 in sstr[:-1].split(',')] + else: + return [_parse_simple_types(sstr[:-1].strip())] + else: + return [_parse_simple_types(sstr2.strip()) + for sstr2 in sstr.split(',')] + + +class OptionStr(str): + """String class providing typecasting methods to parse values in a + :class:`ConfigDict` generated configuration file. + """ + def toint(self): + """ + :return: integer + :raise: ``ValueError`` if conversion to ``int`` failed + """ + return int(self) + + def tofloat(self): + """ + :return: Floating point value + :raise: ``ValueError`` if conversion to ``float`` failed + """ + return float(self) + + def toboolean(self): + """ + '1', 'yes', 'true' and 'on' are interpreted as ``True`` + + '0', 'no', 'false' and 'off' are interpreted as ``False`` + + :return: Boolean + :raise: ``ValueError`` if conversion to ``bool`` failed + """ + return _boolean(self) + + def tostr(self): + """Return string after replacing escaped commas ``\\,`` with regular + commas ``,`` and removing leading backslash. + + :return: str(self) + """ + return str(self.replace(r"\,", ",").lstrip("\\")) + + def tocontainer(self): + """Return a list or a numpy array. + + Any string containing a comma (``,``) character will be interpreted + as a list: for instance ``-1, Hello World, 3.0``, or ``2.0,`` + + The format for numpy arrays is a blank space delimited list of values + between square brackets: ``[ 1.3 2.2 3.1 ]``, or + ``[ [ 1 2 3 ] [ 1 4 9 ] ]``""" + return _parse_container(self) + + def tobestguess(self): + """Parse string without prior knowledge of type. + + Conversion to following types is attempted, in this order: + `list`, `numpy array`, `int`, `float`, `boolean`. + If all of these conversions fail, the string is returned after + removing escape characters. + """ + try: + return _parse_container(self) + except ValueError: + return _parse_simple_types(self) + + +class ConfigDict(OrderedDict): + """Store configuration parameters as an ordered dictionary. + + Parameters can be grouped into sections, by storing them as + sub-dictionaries. + + Keys must be strings. Values can be: integers, booleans, lists, + numpy arrays, floats, strings. + + Methods are provided to write a configuration file in a variant of INI + format. A :class:`ConfigDict` can load (or be initialized from) a list of files. + + The main differences between files written/read by this class and standard + ``ConfigParser`` files are: + + - sections can be nested to any depth + - value types are guessed when the file is read back + - to prevent strings from being interpreted as lists, commas are + escaped with a backslash (``\\,``) + - strings may be prefixed with a leading backslash (``\\``) to prevent + conversion to numeric or boolean values + + :param defaultdict: Default dictionary used to initialize the + :class:`ConfigDict` instance and reinitialize it in case + :meth:`reset` is called + :param initdict: Additional initialisation dictionary, added into dict + after initialisation with ``defaultdict`` + :param filelist: List of configuration files to be read and added into + dict after ``defaultdict`` and ``initdict`` + """ + def __init__(self, defaultdict=None, initdict=None, filelist=None): + self.default = defaultdict if defaultdict is not None else OrderedDict() + OrderedDict.__init__(self, self.default) + self.filelist = [] + + if initdict is not None: + self.update(initdict) + if filelist is not None: + self.read(filelist) + + def reset(self): + """ Revert to default values + """ + self.clear() + self.update(self.default) + + def clear(self): + """ Clear dictionnary + """ + OrderedDict.clear(self) + self.filelist = [] + + def __tolist(self, mylist): + """ If ``mylist` is not a list, encapsulate it in a list and return + it. + + :param mylist: List to encapsulate + :returns: ``mylist`` if it is a list, ``[mylist]`` if it isn't + """ + if mylist is None: + return None + if not isinstance(mylist, list): + return [mylist] + else: + return mylist + + def getfiles(self): + """Return list of configuration file names""" + return self.filelist + + def getlastfile(self): + """Return last configuration file name""" + return self.filelist[len(self.filelist) - 1] + + def __convert(self, option): + """Used as ``configparser.ConfigParser().optionxform`` to transform + option names on every read, get, or set operation. + + This overrides the default :mod:`ConfigParser` behavior, in order to + preserve case rather converting names to lowercase. + + :param option: Option name (any string) + :return: ``option`` unchanged + """ + return option + + def read(self, filelist, sections=None): + """ + Read all specified configuration files into the internal dictionary. + + :param filelist: List of names of files to be added into the internal + dictionary + :param sections: If not ``None``, add only the content of the + specified sections + :type sections: List + """ + filelist = self.__tolist(filelist) + sections = self.__tolist(sections) + cfg = configparser.ConfigParser() + cfg.optionxform = self.__convert + cfg.read(filelist) + self.__read(cfg, sections) + + for ffile in filelist: + self.filelist.append([ffile, sections]) + + def __read(self, cfg, sections=None): + """Read a :class:`configparser.ConfigParser` instance into the + internal dictionary. + + :param cfg: Instance of :class:`configparser.ConfigParser` + :param sections: If not ``None``, add only the content of the + specified sections into the internal dictionary + """ + cfgsect = cfg.sections() + + if sections is None: + readsect = cfgsect + else: + readsect = [sect for sect in cfgsect if sect in sections] + + for sect in readsect: + ddict = self + for subsectw in sect.split('.'): + subsect = subsectw.replace("_|_", ".") + if not subsect in ddict: + ddict[subsect] = OrderedDict() + ddict = ddict[subsect] + for opt in cfg.options(sect): + ddict[opt] = self.__parse_data(cfg.get(sect, opt)) + + def __parse_data(self, data): + """Parse an option returned by ``ConfigParser``. + + :param data: Option string to be parsed + + The original option is a string, we try to parse it as one of + following types: `numpx array`, `list`, `float`, `int`, `boolean`, + `string` + """ + return OptionStr(data).tobestguess() + + def tostring(self): + """Return INI file content generated by :meth:`write` as a string + """ + import StringIO + tmp = StringIO.StringIO() + self.__write(tmp, self) + return tmp.getvalue() + + def write(self, ffile): + """Write the current dictionary to the given filename or + file handle. + + :param ffile: Output file name or file handle. If a file name is + provided, the method opens it, writes it and closes it again. + """ + if not hasattr(ffile, "write"): + fp = open(ffile, "w") + else: + fp = ffile + + self.__write(fp, self) + + if not hasattr(ffile, "write"): + fp.close() + + def _escape_str(self, sstr): + """Escape strings and special characters in strings with a ``\\`` + character to ensure they are read back as strings and not parsed. + + :param sstr: String to be escaped + :returns sstr: String with escape characters (if needed) + + This way, we ensure these strings cannot be interpreted as a numeric + or boolean types and commas in strings are not interpreted as list + items separators. We also escape ``%`` when it is not followed by a + ``(``, as required by :mod:`configparser` because ``%`` is used in + the interpolation syntax + (https://docs.python.org/3/library/configparser.html#interpolation-of-values). + """ + non_str = r'^([0-9]+|[0-9]*\.[0-9]*|none|false|true|on|off|yes|no)$' + if re.match(non_str, sstr.lower()): + sstr = "\\" + sstr + # Escape commas + sstr = sstr.replace(",", r"\,") + + if sys.version_info >= (3, ): + # Escape % characters except in "%%" and "%(" + sstr = re.sub(r'%([^%\(])', r'%%\1', sstr) + + return sstr + + def __write(self, fp, ddict, secthead=None): + """Do the actual file writing when called by the ``write`` method. + + :param fp: File handle + :param ddict: Dictionary to be written to file + :param secthead: Prefix for section name, used for handling nested + dictionaries recursively. + """ + dictkey = [] + + for key in ddict.keys(): + if hasattr(ddict[key], 'keys'): + # subsections are added at the end of a section + dictkey.append(key) + elif isinstance(ddict[key], list): + fp.write('%s = ' % key) + llist = [] + sep = ', ' + for item in ddict[key]: + if isinstance(item, list): + if len(item) == 1: + if isinstance(item[0], string_types): + self._escape_str(item[0]) + llist.append('%s,' % self._escape_str(item[0])) + else: + llist.append('%s,' % item[0]) + else: + item2 = [] + for val in item: + if isinstance(val, string_types): + val = self._escape_str(val) + item2.append(val) + llist.append(', '.join([str(val) for val in item2])) + sep = '\n\t' + elif isinstance(item, string_types): + llist.append(self._escape_str(item)) + else: + llist.append(str(item)) + fp.write('%s\n' % (sep.join(llist))) + elif isinstance(ddict[key], string_types): + fp.write('%s = %s\n' % (key, self._escape_str(ddict[key]))) + else: + if isinstance(ddict[key], numpy.ndarray): + fp.write('%s =' % key + ' [ ' + + ' '.join([str(val) for val in ddict[key]]) + + ' ]\n') + else: + fp.write('%s = %s\n' % (key, ddict[key])) + + for key in dictkey: + if secthead is None: + newsecthead = key.replace(".", "_|_") + else: + newsecthead = '%s.%s' % (secthead, key.replace(".", "_|_")) + + fp.write('\n[%s]\n' % newsecthead) + self.__write(fp, ddict[key], newsecthead) diff --git a/src/silx/io/convert.py b/src/silx/io/convert.py new file mode 100644 index 0000000..ba9a254 --- /dev/null +++ b/src/silx/io/convert.py @@ -0,0 +1,335 @@ +# coding: utf-8 +# /*########################################################################## +# Copyright (C) 2016-2021 European Synchrotron Radiation Facility +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. +# +# ############################################################################*/ +"""This module provides classes and function to convert file formats supported +by *silx* into HDF5 file. Currently, SPEC file and fabio images are the +supported formats. + +Read the documentation of :mod:`silx.io.spech5`, :mod:`silx.io.fioh5` and :mod:`silx.io.fabioh5` for +information on the structure of the output HDF5 files. + +Text strings are written to the HDF5 datasets as variable-length utf-8. + +.. warning:: + + The output format for text strings changed in silx version 0.7.0. + Prior to that, text was output as fixed-length ASCII. + + To be on the safe side, when reading back a HDF5 file written with an + older version of silx, you can test for the presence of a *decode* + attribute. To ensure that you always work with unicode text:: + + >>> import h5py + >>> h5f = h5py.File("my_scans.h5", "r") + >>> title = h5f["/68.1/title"] + >>> if hasattr(title, "decode"): + ... title = title.decode() + + +.. note:: This module has a dependency on the `h5py <http://www.h5py.org/>`_ + library, which is not a mandatory dependency for `silx`. You might need + to install it if you don't already have it. +""" + +__authors__ = ["P. Knobel"] +__license__ = "MIT" +__date__ = "17/07/2018" + + +import logging + +import h5py +import numpy + +import silx.io +from .utils import is_dataset, is_group, is_softlink, visitall +from . import fabioh5 + + +_logger = logging.getLogger(__name__) + + +def _create_link(h5f, link_name, target_name, + link_type="soft", overwrite_data=False): + """Create a link in a HDF5 file + + If member with name ``link_name`` already exists, delete it first or + ignore link depending on global param ``overwrite_data``. + + :param h5f: :class:`h5py.File` object + :param link_name: Link path + :param target_name: Handle for target group or dataset + :param str link_type: "soft" or "hard" + :param bool overwrite_data: If True, delete existing member (group, + dataset or link) with the same name. Default is False. + """ + if link_name not in h5f: + _logger.debug("Creating link " + link_name + " -> " + target_name) + elif overwrite_data: + _logger.warning("Overwriting " + link_name + " with link to " + + target_name) + del h5f[link_name] + else: + _logger.warning(link_name + " already exist. Cannot create link to " + + target_name) + return None + + if link_type == "hard": + h5f[link_name] = h5f[target_name] + elif link_type == "soft": + h5f[link_name] = h5py.SoftLink(target_name) + else: + raise ValueError("link_type must be 'hard' or 'soft'") + + +def _attr_utf8(attr_value): + """If attr_value is bytes, make sure we output utf-8 + + :param attr_value: String (possibly bytes if PY2) + :return: Attr ready to be written by h5py as utf8 + """ + if isinstance(attr_value, (bytes, str)): + out_attr_value = numpy.array( + attr_value, + dtype=h5py.special_dtype(vlen=str)) + else: + out_attr_value = attr_value + + return out_attr_value + + +class Hdf5Writer(object): + """Converter class to write the content of a data file to a HDF5 file. + """ + def __init__(self, + h5path='/', + overwrite_data=False, + link_type="soft", + create_dataset_args=None, + min_size=500): + """ + + :param h5path: Target path where the scan groups will be written + in the output HDF5 file. + :param bool overwrite_data: + See documentation of :func:`write_to_h5` + :param str link_type: ``"hard"`` or ``"soft"`` (default) + :param dict create_dataset_args: Dictionary of args you want to pass to + ``h5py.File.create_dataset``. + See documentation of :func:`write_to_h5` + :param int min_size: + See documentation of :func:`write_to_h5` + """ + self.h5path = h5path + if not h5path.startswith("/"): + # target path must be absolute + self.h5path = "/" + h5path + if not self.h5path.endswith("/"): + self.h5path += "/" + + self._h5f = None + """h5py.File object, assigned in :meth:`write`""" + + if create_dataset_args is None: + create_dataset_args = {} + self.create_dataset_args = create_dataset_args + + self.min_size = min_size + + self.overwrite_data = overwrite_data # boolean + + self.link_type = link_type + """'soft' or 'hard' """ + + self._links = [] + """List of *(link_path, target_path)* tuples.""" + + def write(self, infile, h5f): + """Copy `infile` content to `h5f` file under `h5path`. + + All the parameters needed for the conversion have been initialized + in the constructor. + + External links in `infile` are ignored. + + :param Union[commonh5.Group,h5py.Group] infile: + File/Class from which to read the content to copy from. + :param h5py.File h5f: File where to write the copied content to + """ + # Recurse through all groups and datasets to add them to the HDF5 + self._h5f = h5f + for name, item in visitall(infile): + self.append_member_to_h5(name, item) + + # Handle the attributes of the root group + root_grp = h5f[self.h5path] + for key in infile.attrs: + if self.overwrite_data or key not in root_grp.attrs: + root_grp.attrs.create(key, + _attr_utf8(infile.attrs[key])) + + # Handle links at the end, when their targets are created + for link_name, target_name in self._links: + _create_link(self._h5f, link_name, target_name, + link_type=self.link_type, + overwrite_data=self.overwrite_data) + self._links = [] + + def append_member_to_h5(self, h5like_name, obj): + """Add one group or one dataset to :attr:`h5f`""" + h5_name = self.h5path + h5like_name.lstrip("/") + if is_softlink(obj): + # links to be created after all groups and datasets + h5_target = self.h5path + obj.path.lstrip("/") + self._links.append((h5_name, h5_target)) + + elif is_dataset(obj): + _logger.debug("Saving dataset: " + h5_name) + + member_initially_exists = h5_name in self._h5f + + if self.overwrite_data and member_initially_exists: + _logger.warning("Overwriting dataset: " + h5_name) + del self._h5f[h5_name] + + if self.overwrite_data or not member_initially_exists: + if isinstance(obj, fabioh5.FrameData) and len(obj.shape) > 2: + # special case of multiframe data + # write frame by frame to save memory usage low + ds = self._h5f.create_dataset(h5_name, + shape=obj.shape, + dtype=obj.dtype, + **self.create_dataset_args) + for i, frame in enumerate(obj): + ds[i] = frame + else: + # fancy arguments don't apply to small dataset + if obj.size < self.min_size: + ds = self._h5f.create_dataset(h5_name, data=obj[()]) + else: + ds = self._h5f.create_dataset(h5_name, data=obj[()], + **self.create_dataset_args) + else: + ds = self._h5f[h5_name] + + # add HDF5 attributes + for key in obj.attrs: + if self.overwrite_data or key not in ds.attrs: + ds.attrs.create(key, + _attr_utf8(obj.attrs[key])) + + if not self.overwrite_data and member_initially_exists: + _logger.warning("Not overwriting existing dataset: " + h5_name) + + elif is_group(obj): + if h5_name not in self._h5f: + _logger.debug("Creating group: " + h5_name) + grp = self._h5f.create_group(h5_name) + else: + grp = self._h5f[h5_name] + + # add HDF5 attributes + for key in obj.attrs: + if self.overwrite_data or key not in grp.attrs: + grp.attrs.create(key, + _attr_utf8(obj.attrs[key])) + else: + _logger.warning("Unsuppored entity, ignoring: %s", h5_name) + + +def write_to_h5(infile, h5file, h5path='/', mode="a", + overwrite_data=False, link_type="soft", + create_dataset_args=None, min_size=500): + """Write content of a h5py-like object into a HDF5 file. + + Warning: External links in `infile` are ignored. + + :param infile: Path of input file, :class:`commonh5.File`, + :class:`commonh5.Group`, :class:`h5py.File` or :class:`h5py.Group` + :param h5file: Path of output HDF5 file or HDF5 file handle + (`h5py.File` object) + :param str h5path: Target path in HDF5 file in which scan groups are created. + Default is root (``"/"``) + :param str mode: Can be ``"r+"`` (read/write, file must exist), + ``"w"`` (write, existing file is lost), ``"w-"`` (write, fail + if exists) or ``"a"`` (read/write if exists, create otherwise). + This parameter is ignored if ``h5file`` is a file handle. + :param bool overwrite_data: If ``True``, existing groups and datasets can be + overwritten, if ``False`` they are skipped. This parameter is only + relevant if ``file_mode`` is ``"r+"`` or ``"a"``. + :param str link_type: *"soft"* (default) or *"hard"* + :param dict create_dataset_args: Dictionary of args you want to pass to + ``h5py.File.create_dataset``. This allows you to specify filters and + compression parameters. Don't specify ``name`` and ``data``. + These arguments are only applied to datasets larger than 1MB. + :param int min_size: Minimum number of elements in a dataset to apply + chunking and compression. Default is 500. + + The structure of the spec data in an HDF5 file is described in the + documentation of :mod:`silx.io.spech5`. + """ + writer = Hdf5Writer(h5path=h5path, + overwrite_data=overwrite_data, + link_type=link_type, + create_dataset_args=create_dataset_args, + min_size=min_size) + + # both infile and h5file can be either file handle or a file name: 4 cases + if not isinstance(h5file, h5py.File) and not is_group(infile): + with silx.io.open(infile) as h5pylike: + with h5py.File(h5file, mode) as h5f: + writer.write(h5pylike, h5f) + elif isinstance(h5file, h5py.File) and not is_group(infile): + with silx.io.open(infile) as h5pylike: + writer.write(h5pylike, h5file) + elif is_group(infile) and not isinstance(h5file, h5py.File): + with h5py.File(h5file, mode) as h5f: + writer.write(infile, h5f) + else: + writer.write(infile, h5file) + + +def convert(infile, h5file, mode="w-", create_dataset_args=None): + """Convert a supported file into an HDF5 file, write scans into the + root group (``/``). + + This is a convenience shortcut to call:: + + write_to_h5(h5like, h5file, h5path='/', + mode="w-", link_type="soft") + + :param infile: Path of input file or :class:`commonh5.File` object + or :class:`commonh5.Group` object + :param h5file: Path of output HDF5 file, or h5py.File object + :param mode: Can be ``"w"`` (write, existing file is + lost), ``"w-"`` (write, fail if exists). This is ignored + if ``h5file`` is a file handle. + :param create_dataset_args: Dictionary of args you want to pass to + ``h5py.File.create_dataset``. This allows you to specify filters and + compression parameters. Don't specify ``name`` and ``data``. + """ + if mode not in ["w", "w-"]: + raise IOError("File mode must be 'w' or 'w-'. Use write_to_h5" + + " to append data to an existing HDF5 file.") + write_to_h5(infile, h5file, h5path='/', mode=mode, + create_dataset_args=create_dataset_args) diff --git a/src/silx/io/dictdump.py b/src/silx/io/dictdump.py new file mode 100644 index 0000000..a24de42 --- /dev/null +++ b/src/silx/io/dictdump.py @@ -0,0 +1,843 @@ +# coding: utf-8 +# /*########################################################################## +# Copyright (C) 2016-2020 European Synchrotron Radiation Facility +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. +# +# ############################################################################*/ +"""This module offers a set of functions to dump a python dictionary indexed +by text strings to following file formats: `HDF5, INI, JSON` +""" + +from collections import OrderedDict +from collections.abc import Mapping +import json +import logging +import numpy +import os.path +import sys +import h5py + +from .configdict import ConfigDict +from .utils import is_group +from .utils import is_dataset +from .utils import is_link +from .utils import is_softlink +from .utils import is_externallink +from .utils import is_file as is_h5_file_like +from .utils import open as h5open +from .utils import h5py_read_dataset +from .utils import H5pyAttributesReadWrapper +from silx.utils.deprecation import deprecated_warning + +__authors__ = ["P. Knobel"] +__license__ = "MIT" +__date__ = "17/07/2018" + +logger = logging.getLogger(__name__) + +vlen_utf8 = h5py.special_dtype(vlen=str) +vlen_bytes = h5py.special_dtype(vlen=bytes) + +UPDATE_MODE_VALID_EXISTING_VALUES = ("add", "replace", "modify") + + +def _prepare_hdf5_write_value(array_like): + """Cast a python object into a numpy array in a HDF5 friendly format. + + :param array_like: Input dataset in a type that can be digested by + ``numpy.array()`` (`str`, `list`, `numpy.ndarray`…) + :return: ``numpy.ndarray`` ready to be written as an HDF5 dataset + """ + array = numpy.asarray(array_like) + if numpy.issubdtype(array.dtype, numpy.bytes_): + return numpy.array(array_like, dtype=vlen_bytes) + elif numpy.issubdtype(array.dtype, numpy.str_): + return numpy.array(array_like, dtype=vlen_utf8) + else: + return array + + +class _SafeH5FileWrite: + """Context manager returning a :class:`h5py.File` object. + + If this object is initialized with a file path, we open the file + and then we close it on exiting. + + If a :class:`h5py.File` instance is provided to :meth:`__init__` rather + than a path, we assume that the user is responsible for closing the + file. + + This behavior is well suited for handling h5py file in a recursive + function. The object is created in the initial call if a path is provided, + and it is closed only at the end when all the processing is finished. + """ + def __init__(self, h5file, mode="w"): + """ + :param h5file: HDF5 file path or :class:`h5py.File` instance + :param str mode: Can be ``"r+"`` (read/write, file must exist), + ``"w"`` (write, existing file is lost), ``"w-"`` (write, fail if + exists) or ``"a"`` (read/write if exists, create otherwise). + This parameter is ignored if ``h5file`` is a file handle. + """ + self.raw_h5file = h5file + self.mode = mode + + def __enter__(self): + if not isinstance(self.raw_h5file, h5py.File): + self.h5file = h5py.File(self.raw_h5file, self.mode) + self.close_when_finished = True + else: + self.h5file = self.raw_h5file + self.close_when_finished = False + return self.h5file + + def __exit__(self, exc_type, exc_val, exc_tb): + if self.close_when_finished: + self.h5file.close() + + +class _SafeH5FileRead: + """Context manager returning a :class:`h5py.File` or a + :class:`silx.io.spech5.SpecH5` or a :class:`silx.io.fabioh5.File` object. + + The general behavior is the same as :class:`_SafeH5FileWrite` except + that SPEC files and all formats supported by fabio can also be opened, + but in read-only mode. + """ + def __init__(self, h5file): + """ + + :param h5file: HDF5 file path or h5py.File-like object + """ + self.raw_h5file = h5file + + def __enter__(self): + if not is_h5_file_like(self.raw_h5file): + self.h5file = h5open(self.raw_h5file) + self.close_when_finished = True + else: + self.h5file = self.raw_h5file + self.close_when_finished = False + + return self.h5file + + def __exit__(self, exc_type, exc_val, exc_tb): + if self.close_when_finished: + self.h5file.close() + + +def _normalize_h5_path(h5root, h5path): + """ + :param h5root: File name or h5py-like File, Group or Dataset + :param str h5path: relative to ``h5root`` + :returns 2-tuple: (File or file object, h5path) + """ + if is_group(h5root): + group_name = h5root.name + if group_name == "/": + pass + elif h5path: + h5path = group_name + "/" + h5path + else: + h5path = group_name + h5file = h5root.file + elif is_dataset(h5root): + h5path = h5root.name + h5file = h5root.file + else: + h5file = h5root + if not h5path: + h5path = "/" + elif not h5path.endswith("/"): + h5path += "/" + return h5file, h5path + + +def dicttoh5(treedict, h5file, h5path='/', + mode="w", overwrite_data=None, + create_dataset_args=None, update_mode=None): + """Write a nested dictionary to a HDF5 file, using keys as member names. + + If a dictionary value is a sub-dictionary, a group is created. If it is + any other data type, it is cast into a numpy array and written as a + :mod:`h5py` dataset. Dictionary keys must be strings and cannot contain + the ``/`` character. + + If dictionary keys are tuples they are interpreted to set h5 attributes. + The tuples should have the format (dataset_name, attr_name). + + Existing HDF5 items can be deleted by providing the dictionary value + ``None``, provided that ``update_mode in ["modify", "replace"]``. + + .. note:: + + This function requires `h5py <http://www.h5py.org/>`_ to be installed. + + :param treedict: Nested dictionary/tree structure with strings or tuples as + keys and array-like objects as leafs. The ``"/"`` character can be used + to define sub trees. If tuples are used as keys they should have the + format (dataset_name,attr_name) and will add a 5h attribute with the + corresponding value. + :param h5file: File name or h5py-like File, Group or Dataset + :param h5path: Target path in the HDF5 file relative to ``h5file``. + Default is root (``"/"``) + :param mode: Can be ``"r+"`` (read/write, file must exist), + ``"w"`` (write, existing file is lost), ``"w-"`` (write, fail if + exists) or ``"a"`` (read/write if exists, create otherwise). + This parameter is ignored if ``h5file`` is a file handle. + :param overwrite_data: Deprecated. ``True`` is approximately equivalent + to ``update_mode="modify"`` and ``False`` is equivalent to + ``update_mode="add"``. + :param create_dataset_args: Dictionary of args you want to pass to + ``h5f.create_dataset``. This allows you to specify filters and + compression parameters. Don't specify ``name`` and ``data``. + :param update_mode: Can be ``add`` (default), ``modify`` or ``replace``. + + * ``add``: Extend the existing HDF5 tree when possible. Existing HDF5 + items (groups, datasets and attributes) remain untouched. + * ``modify``: Extend the existing HDF5 tree when possible, modify + existing attributes, modify same-sized dataset values and delete + HDF5 items with a ``None`` value in the dict tree. + * ``replace``: Replace the existing HDF5 tree. Items from the root of + the HDF5 tree that are not present in the root of the dict tree + will remain untouched. + + Example:: + + from silx.io.dictdump import dicttoh5 + + city_area = { + "Europe": { + "France": { + "Isère": { + "Grenoble": 18.44, + ("Grenoble","unit"): "km2" + }, + "Nord": { + "Tourcoing": 15.19, + ("Tourcoing","unit"): "km2" + }, + }, + }, + } + + create_ds_args = {'compression': "gzip", + 'shuffle': True, + 'fletcher32': True} + + dicttoh5(city_area, "cities.h5", h5path="/area", + create_dataset_args=create_ds_args) + """ + + if overwrite_data is not None: + reason = ( + "`overwrite_data=True` becomes `update_mode='modify'` and " + "`overwrite_data=False` becomes `update_mode='add'`" + ) + deprecated_warning( + type_="argument", + name="overwrite_data", + reason=reason, + replacement="update_mode", + since_version="0.15", + ) + + if update_mode is None: + if overwrite_data: + update_mode = "modify" + else: + update_mode = "add" + else: + if update_mode not in UPDATE_MODE_VALID_EXISTING_VALUES: + raise ValueError(( + "Argument 'update_mode' can only have values: {}" + "".format(UPDATE_MODE_VALID_EXISTING_VALUES) + )) + if overwrite_data is not None: + logger.warning("The argument `overwrite_data` is ignored") + + if not isinstance(treedict, Mapping): + raise TypeError("'treedict' must be a dictionary") + + h5file, h5path = _normalize_h5_path(h5file, h5path) + + def _iter_treedict(attributes=False): + nonlocal treedict + for key, value in treedict.items(): + if isinstance(key, tuple) == attributes: + yield key, value + + change_allowed = update_mode in ("replace", "modify") + + with _SafeH5FileWrite(h5file, mode=mode) as h5f: + # Create the root of the tree + if h5path in h5f: + if not is_group(h5f[h5path]): + if update_mode == "replace": + del h5f[h5path] + h5f.create_group(h5path) + else: + return + else: + h5f.create_group(h5path) + + # Loop over all groups, links and datasets + for key, value in _iter_treedict(attributes=False): + h5name = h5path + str(key) + exists = h5name in h5f + + if value is None: + # Delete HDF5 item + if exists and change_allowed: + del h5f[h5name] + exists = False + elif isinstance(value, Mapping): + # HDF5 group + if exists and update_mode == "replace": + del h5f[h5name] + exists = False + if value: + dicttoh5(value, h5f, h5name, + update_mode=update_mode, + create_dataset_args=create_dataset_args) + elif not exists: + h5f.create_group(h5name) + elif is_link(value): + # HDF5 link + if exists and update_mode == "replace": + del h5f[h5name] + exists = False + if not exists: + # Create link from h5py link object + h5f[h5name] = value + else: + # HDF5 dataset + if exists and not change_allowed: + continue + data = _prepare_hdf5_write_value(value) + + # Edit the existing dataset + attrs_backup = None + if exists: + try: + h5f[h5name][()] = data + continue + except Exception: + # Delete the existing dataset + if update_mode != "replace": + if not is_dataset(h5f[h5name]): + continue + attrs_backup = dict(h5f[h5name].attrs) + del h5f[h5name] + + # Create dataset + # can't apply filters on scalars (datasets with shape == ()) + if data.shape == () or create_dataset_args is None: + h5f.create_dataset(h5name, + data=data) + else: + h5f.create_dataset(h5name, + data=data, + **create_dataset_args) + if attrs_backup: + h5f[h5name].attrs.update(attrs_backup) + + # Loop over all attributes + for key, value in _iter_treedict(attributes=True): + if len(key) != 2: + raise ValueError("HDF5 attribute must be described by 2 values") + h5name = h5path + key[0] + attr_name = key[1] + + if h5name not in h5f: + # Create an empty group to store the attribute + h5f.create_group(h5name) + + h5a = h5f[h5name].attrs + exists = attr_name in h5a + + if value is None: + # Delete HDF5 attribute + if exists and change_allowed: + del h5a[attr_name] + exists = False + else: + # Add/modify HDF5 attribute + if exists and not change_allowed: + continue + data = _prepare_hdf5_write_value(value) + h5a[attr_name] = data + + +def _has_nx_class(treedict, key=""): + return key + "@NX_class" in treedict or \ + (key, "NX_class") in treedict + + +def _ensure_nx_class(treedict, parents=tuple()): + """Each group needs an "NX_class" attribute. + """ + if _has_nx_class(treedict): + return + nparents = len(parents) + if nparents == 0: + treedict[("", "NX_class")] = "NXroot" + elif nparents == 1: + treedict[("", "NX_class")] = "NXentry" + else: + treedict[("", "NX_class")] = "NXcollection" + + +def nexus_to_h5_dict( + treedict, parents=tuple(), add_nx_class=True, has_nx_class=False +): + """The following conversions are applied: + * key with "{name}@{attr_name}" notation: key converted to 2-tuple + * key with ">{url}" notation: strip ">" and convert value to + h5py.SoftLink or h5py.ExternalLink + + :param treedict: Nested dictionary/tree structure with strings as keys + and array-like objects as leafs. The ``"/"`` character can be used + to define sub tree. The ``"@"`` character is used to write attributes. + The ``">"`` prefix is used to define links. + :param parents: Needed to resolve up-links (tuple of HDF5 group names) + :param add_nx_class: Add "NX_class" attribute when missing + :param has_nx_class: The "NX_class" attribute is defined in the parent + + :rtype dict: + """ + if not isinstance(treedict, Mapping): + raise TypeError("'treedict' must be a dictionary") + copy = dict() + for key, value in treedict.items(): + if "@" in key: + # HDF5 attribute + key = tuple(key.rsplit("@", 1)) + elif key.startswith(">"): + # HDF5 link + if isinstance(value, str): + key = key[1:] + first, sep, second = value.partition("::") + if sep: + value = h5py.ExternalLink(first, second) + else: + if ".." in first: + # Up-links not supported: make absolute + parts = [] + for p in list(parents) + first.split("/"): + if not p or p == ".": + continue + elif p == "..": + parts.pop(-1) + else: + parts.append(p) + first = "/" + "/".join(parts) + value = h5py.SoftLink(first) + elif is_link(value): + key = key[1:] + if isinstance(value, Mapping): + # HDF5 group + key_has_nx_class = add_nx_class and _has_nx_class(treedict, key) + copy[key] = nexus_to_h5_dict( + value, + parents=parents+(key,), + add_nx_class=add_nx_class, + has_nx_class=key_has_nx_class) + else: + # HDF5 dataset or link + copy[key] = value + if add_nx_class and not has_nx_class: + _ensure_nx_class(copy, parents) + return copy + + +def h5_to_nexus_dict(treedict): + """The following conversions are applied: + * 2-tuple key: converted to string ("@" notation) + * h5py.Softlink value: converted to string (">" key prefix) + * h5py.ExternalLink value: converted to string (">" key prefix) + + :param treedict: Nested dictionary/tree structure with strings as keys + and array-like objects as leafs. The ``"/"`` character can be used + to define sub tree. + + :rtype dict: + """ + copy = dict() + for key, value in treedict.items(): + if isinstance(key, tuple): + if len(key) != 2: + raise ValueError("HDF5 attribute must be described by 2 values") + key = "%s@%s" % (key[0], key[1]) + elif is_softlink(value): + key = ">" + key + value = value.path + elif is_externallink(value): + key = ">" + key + value = value.filename + "::" + value.path + if isinstance(value, Mapping): + copy[key] = h5_to_nexus_dict(value) + else: + copy[key] = value + return copy + + +def _name_contains_string_in_list(name, strlist): + if strlist is None: + return False + for filter_str in strlist: + if filter_str in name: + return True + return False + + +def _handle_error(mode: str, exception, msg: str, *args) -> None: + """Handle errors. + + :param str mode: 'raise', 'log', 'ignore' + :param type exception: Exception class to use in 'raise' mode + :param str msg: Error message template + :param List[str] args: Arguments for error message template + """ + if mode == 'ignore': + return # no-op + elif mode == 'log': + logger.error(msg, *args) + elif mode == 'raise': + raise exception(msg % args) + else: + raise ValueError("Unsupported error handling: %s" % mode) + + +def h5todict(h5file, + path="/", + exclude_names=None, + asarray=True, + dereference_links=True, + include_attributes=False, + errors='raise'): + """Read a HDF5 file and return a nested dictionary with the complete file + structure and all data. + + Example of usage:: + + from silx.io.dictdump import h5todict + + # initialize dict with file header and scan header + header94 = h5todict("oleg.dat", + "/94.1/instrument/specfile") + # add positioners subdict + header94["positioners"] = h5todict("oleg.dat", + "/94.1/instrument/positioners") + # add scan data without mca data + header94["detector data"] = h5todict("oleg.dat", + "/94.1/measurement", + exclude_names="mca_") + + + .. note:: This function requires `h5py <http://www.h5py.org/>`_ to be + installed. + + .. note:: If you write a dictionary to a HDF5 file with + :func:`dicttoh5` and then read it back with :func:`h5todict`, data + types are not preserved. All values are cast to numpy arrays before + being written to file, and they are read back as numpy arrays (or + scalars). In some cases, you may find that a list of heterogeneous + data types is converted to a numpy array of strings. + + :param h5file: File name or h5py-like File, Group or Dataset + :param str path: Target path in the HDF5 file relative to ``h5file`` + :param List[str] exclude_names: Groups and datasets whose name contains + a string in this list will be ignored. Default is None (ignore nothing) + :param bool asarray: True (default) to read scalar as arrays, False to + read them as scalar + :param bool dereference_links: True (default) to dereference links, False + to preserve the link itself + :param bool include_attributes: False (default) + :param str errors: Handling of errors (HDF5 access issue, broken link,...): + - 'raise' (default): Raise an exception + - 'log': Log as errors + - 'ignore': Ignore errors + :return: Nested dictionary + """ + h5file, path = _normalize_h5_path(h5file, path) + with _SafeH5FileRead(h5file) as h5f: + ddict = {} + if path not in h5f: + _handle_error( + errors, KeyError, 'Path "%s" does not exist in file.', path) + return ddict + + try: + root = h5f[path] + except KeyError as e: + if not isinstance(h5f.get(path, getlink=True), h5py.HardLink): + _handle_error(errors, + KeyError, + 'Cannot retrieve path "%s" (broken link)', + path) + else: + _handle_error(errors, KeyError, ', '.join(e.args)) + return ddict + + # Read the attributes of the group + if include_attributes: + attrs = H5pyAttributesReadWrapper(root.attrs) + for aname, avalue in attrs.items(): + ddict[("", aname)] = avalue + # Read the children of the group + for key in root: + if _name_contains_string_in_list(key, exclude_names): + continue + h5name = path + "/" + key + # Preserve HDF5 link when requested + if not dereference_links: + lnk = h5f.get(h5name, getlink=True) + if is_link(lnk): + ddict[key] = lnk + continue + + try: + h5obj = h5f[h5name] + except KeyError as e: + if not isinstance(h5f.get(h5name, getlink=True), h5py.HardLink): + _handle_error(errors, + KeyError, + 'Cannot retrieve path "%s" (broken link)', + h5name) + else: + _handle_error(errors, KeyError, ', '.join(e.args)) + continue + + if is_group(h5obj): + # Child is an HDF5 group + ddict[key] = h5todict(h5f, + h5name, + exclude_names=exclude_names, + asarray=asarray, + dereference_links=dereference_links, + include_attributes=include_attributes) + else: + # Child is an HDF5 dataset + try: + data = h5py_read_dataset(h5obj) + except OSError: + _handle_error(errors, + OSError, + 'Cannot retrieve dataset "%s"', + h5name) + else: + if asarray: # Convert HDF5 dataset to numpy array + data = numpy.array(data, copy=False) + ddict[key] = data + # Read the attributes of the child + if include_attributes: + attrs = H5pyAttributesReadWrapper(h5obj.attrs) + for aname, avalue in attrs.items(): + ddict[(key, aname)] = avalue + return ddict + + +def dicttonx(treedict, h5file, h5path="/", add_nx_class=None, **kw): + """ + Write a nested dictionary to a HDF5 file, using string keys as member names. + The NeXus convention is used to identify attributes with ``"@"`` character, + therefore the dataset_names should not contain ``"@"``. + + Similarly, links are identified by keys starting with the ``">"`` character. + The corresponding value can be a soft or external link. + + :param treedict: Nested dictionary/tree structure with strings as keys + and array-like objects as leafs. The ``"/"`` character can be used + to define sub tree. The ``"@"`` character is used to write attributes. + The ``">"`` prefix is used to define links. + :param add_nx_class: Add "NX_class" attribute when missing. By default it + is ``True`` when ``update_mode`` is ``"add"`` or ``None``. + + The named parameters are passed to dicttoh5. + + Example:: + + import numpy + from silx.io.dictdump import dicttonx + + gauss = { + "entry":{ + "title":u"A plot of a gaussian", + "instrument": { + "@NX_class": u"NXinstrument", + "positioners": { + "@NX_class": u"NXCollection", + "x": numpy.arange(0,1.1,.1) + } + } + "plot": { + "y": numpy.array([0.08, 0.19, 0.39, 0.66, 0.9, 1., + 0.9, 0.66, 0.39, 0.19, 0.08]), + ">x": "../instrument/positioners/x", + "@signal": "y", + "@axes": "x", + "@NX_class":u"NXdata", + "title:u"Gauss Plot", + }, + "@NX_class": u"NXentry", + "default":"plot", + } + "@NX_class": u"NXroot", + "@default": "entry", + } + + dicttonx(gauss,"test.h5") + """ + h5file, h5path = _normalize_h5_path(h5file, h5path) + parents = tuple(p for p in h5path.split("/") if p) + if add_nx_class is None: + add_nx_class = kw.get("update_mode", None) in (None, "add") + nxtreedict = nexus_to_h5_dict( + treedict, parents=parents, add_nx_class=add_nx_class + ) + dicttoh5(nxtreedict, h5file, h5path=h5path, **kw) + + +def nxtodict(h5file, include_attributes=True, **kw): + """Read a HDF5 file and return a nested dictionary with the complete file + structure and all data. + + As opposed to h5todict, all keys will be strings and no h5py objects are + present in the tree. + + The named parameters are passed to h5todict. + """ + nxtreedict = h5todict(h5file, include_attributes=include_attributes, **kw) + return h5_to_nexus_dict(nxtreedict) + + +def dicttojson(ddict, jsonfile, indent=None, mode="w"): + """Serialize ``ddict`` as a JSON formatted stream to ``jsonfile``. + + :param ddict: Dictionary (or any object compatible with ``json.dump``). + :param jsonfile: JSON file name or file-like object. + If a file name is provided, the function opens the file in the + specified mode and closes it again. + :param indent: If indent is a non-negative integer, then JSON array + elements and object members will be pretty-printed with that indent + level. An indent level of ``0`` will only insert newlines. + ``None`` (the default) selects the most compact representation. + :param mode: File opening mode (``w``, ``a``, ``w+``…) + """ + if not hasattr(jsonfile, "write"): + jsonf = open(jsonfile, mode) + else: + jsonf = jsonfile + + json.dump(ddict, jsonf, indent=indent) + + if not hasattr(jsonfile, "write"): + jsonf.close() + + +def dicttoini(ddict, inifile, mode="w"): + """Output dict as configuration file (similar to Microsoft Windows INI). + + :param dict: Dictionary of configuration parameters + :param inifile: INI file name or file-like object. + If a file name is provided, the function opens the file in the + specified mode and closes it again. + :param mode: File opening mode (``w``, ``a``, ``w+``…) + """ + if not hasattr(inifile, "write"): + inif = open(inifile, mode) + else: + inif = inifile + + ConfigDict(initdict=ddict).write(inif) + + if not hasattr(inifile, "write"): + inif.close() + + +def dump(ddict, ffile, mode="w", fmat=None): + """Dump dictionary to a file + + :param ddict: Dictionary with string keys + :param ffile: File name or file-like object with a ``write`` method + :param str fmat: Output format: ``"json"``, ``"hdf5"`` or ``"ini"``. + When None (the default), it uses the filename extension as the format. + Dumping to a HDF5 file requires `h5py <http://www.h5py.org/>`_ to be + installed. + :param str mode: File opening mode (``w``, ``a``, ``w+``…) + Default is *"w"*, write mode, overwrite if exists. + :raises IOError: if file format is not supported + """ + if fmat is None: + # If file-like object get its name, else use ffile as filename + filename = getattr(ffile, 'name', ffile) + fmat = os.path.splitext(filename)[1][1:] # Strip extension leading '.' + fmat = fmat.lower() + + if fmat == "json": + dicttojson(ddict, ffile, indent=2, mode=mode) + elif fmat in ["hdf5", "h5"]: + dicttoh5(ddict, ffile, mode=mode) + elif fmat in ["ini", "cfg"]: + dicttoini(ddict, ffile, mode=mode) + else: + raise IOError("Unknown format " + fmat) + + +def load(ffile, fmat=None): + """Load dictionary from a file + + When loading from a JSON or INI file, an OrderedDict is returned to + preserve the values' insertion order. + + :param ffile: File name or file-like object with a ``read`` method + :param fmat: Input format: ``json``, ``hdf5`` or ``ini``. + When None (the default), it uses the filename extension as the format. + Loading from a HDF5 file requires `h5py <http://www.h5py.org/>`_ to be + installed. + :return: Dictionary (ordered dictionary for JSON and INI) + :raises IOError: if file format is not supported + """ + must_be_closed = False + if not hasattr(ffile, "read"): + f = open(ffile, "r") + fname = ffile + must_be_closed = True + else: + f = ffile + fname = ffile.name + + try: + if fmat is None: # Use file extension as format + fmat = os.path.splitext(fname)[1][1:] # Strip extension leading '.' + fmat = fmat.lower() + + if fmat == "json": + return json.load(f, object_pairs_hook=OrderedDict) + if fmat in ["hdf5", "h5"]: + return h5todict(fname) + elif fmat in ["ini", "cfg"]: + return ConfigDict(filelist=[fname]) + else: + raise IOError("Unknown format " + fmat) + finally: + if must_be_closed: + f.close() diff --git a/src/silx/io/fabioh5.py b/src/silx/io/fabioh5.py new file mode 100755 index 0000000..af9b29a --- /dev/null +++ b/src/silx/io/fabioh5.py @@ -0,0 +1,1050 @@ +# coding: utf-8 +# /*########################################################################## +# Copyright (C) 2016-2021 European Synchrotron Radiation Facility +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. +# +# ############################################################################*/ +"""This module provides functions to read fabio images as an HDF5 file. + + >>> import silx.io.fabioh5 + >>> f = silx.io.fabioh5.File("foobar.edf") + +.. note:: This module has a dependency on the `h5py <http://www.h5py.org/>`_ + and `fabio <https://github.com/silx-kit/fabio>`_ libraries, + which are not mandatory dependencies for `silx`. + +""" + +import collections +import datetime +import logging +import numbers +import os + +import fabio.file_series +import numpy + +from . import commonh5 +from silx import version as silx_version +import silx.utils.number +import h5py + + +_logger = logging.getLogger(__name__) + + +_fabio_extensions = set([]) + + +def supported_extensions(): + """Returns all extensions supported by fabio. + + :returns: A set containing extensions like "*.edf". + :rtype: Set[str] + """ + global _fabio_extensions + if len(_fabio_extensions) > 0: + return _fabio_extensions + + formats = fabio.fabioformats.get_classes(reader=True) + all_extensions = set([]) + + for reader in formats: + if not hasattr(reader, "DEFAULT_EXTENSIONS"): + continue + + ext = reader.DEFAULT_EXTENSIONS + ext = ["*.%s" % e for e in ext] + all_extensions.update(ext) + + _fabio_extensions = set(all_extensions) + return _fabio_extensions + + +class _FileSeries(fabio.file_series.file_series): + """ + .. note:: Overwrite a function to fix an issue in fabio. + """ + def jump(self, num): + """ + Goto a position in sequence + """ + assert num < len(self) and num >= 0, "num out of range" + self._current = num + return self[self._current] + + +class FrameData(commonh5.LazyLoadableDataset): + """Expose a cube of image from a Fabio file using `FabioReader` as + cache.""" + + def __init__(self, name, fabio_reader, parent=None): + if fabio_reader.is_spectrum(): + attrs = {"interpretation": "spectrum"} + else: + attrs = {"interpretation": "image"} + commonh5.LazyLoadableDataset.__init__(self, name, parent, attrs=attrs) + self.__fabio_reader = fabio_reader + self._shape = None + self._dtype = None + + def _create_data(self): + return self.__fabio_reader.get_data() + + def _update_cache(self): + if isinstance(self.__fabio_reader.fabio_file(), + fabio.file_series.file_series): + # Reading all the files is taking too much time + # Reach the information from the only first frame + first_image = self.__fabio_reader.fabio_file().first_image() + self._dtype = first_image.data.dtype + shape0 = self.__fabio_reader.frame_count() + shape1, shape2 = first_image.data.shape + self._shape = shape0, shape1, shape2 + else: + self._dtype = super(commonh5.LazyLoadableDataset, self).dtype + self._shape = super(commonh5.LazyLoadableDataset, self).shape + + @property + def dtype(self): + if self._dtype is None: + self._update_cache() + return self._dtype + + @property + def shape(self): + if self._shape is None: + self._update_cache() + return self._shape + + def __iter__(self): + for frame in self.__fabio_reader.iter_frames(): + yield frame.data + + def __getitem__(self, item): + # optimization for fetching a single frame if data not already loaded + if not self._is_initialized: + if isinstance(item, int) and \ + isinstance(self.__fabio_reader.fabio_file(), + fabio.file_series.file_series): + if item < 0: + # negative indexing + item += len(self) + return self.__fabio_reader.fabio_file().jump_image(item).data + return super(FrameData, self).__getitem__(item) + + +class RawHeaderData(commonh5.LazyLoadableDataset): + """Lazy loadable raw header""" + + def __init__(self, name, fabio_reader, parent=None): + commonh5.LazyLoadableDataset.__init__(self, name, parent) + self.__fabio_reader = fabio_reader + + def _create_data(self): + """Initialize hold data by merging all headers of each frames. + """ + headers = [] + types = set([]) + for fabio_frame in self.__fabio_reader.iter_frames(): + header = fabio_frame.header + + data = [] + for key, value in header.items(): + data.append("%s: %s" % (str(key), str(value))) + + data = "\n".join(data) + try: + line = data.encode("ascii") + types.add(numpy.string_) + except UnicodeEncodeError: + try: + line = data.encode("utf-8") + types.add(numpy.unicode_) + except UnicodeEncodeError: + # Fallback in void + line = numpy.void(data) + types.add(numpy.void) + + headers.append(line) + + if numpy.void in types: + dtype = numpy.void + elif numpy.unicode_ in types: + dtype = numpy.unicode_ + else: + dtype = numpy.string_ + + if dtype == numpy.unicode_: + # h5py only support vlen unicode + dtype = h5py.special_dtype(vlen=str) + + return numpy.array(headers, dtype=dtype) + + +class MetadataGroup(commonh5.LazyLoadableGroup): + """Abstract class for groups containing a reference to a fabio image. + """ + + def __init__(self, name, metadata_reader, kind, parent=None, attrs=None): + commonh5.LazyLoadableGroup.__init__(self, name, parent, attrs) + self.__metadata_reader = metadata_reader + self.__kind = kind + + def _create_child(self): + keys = self.__metadata_reader.get_keys(self.__kind) + for name in keys: + data = self.__metadata_reader.get_value(self.__kind, name) + dataset = commonh5.Dataset(name, data) + self.add_node(dataset) + + @property + def _metadata_reader(self): + return self.__metadata_reader + + +class DetectorGroup(commonh5.LazyLoadableGroup): + """Define the detector group (sub group of instrument) using Fabio data. + """ + + def __init__(self, name, fabio_reader, parent=None, attrs=None): + if attrs is None: + attrs = {"NX_class": "NXdetector"} + commonh5.LazyLoadableGroup.__init__(self, name, parent, attrs) + self.__fabio_reader = fabio_reader + + def _create_child(self): + data = FrameData("data", self.__fabio_reader) + self.add_node(data) + + # TODO we should add here Nexus informations we can extract from the + # metadata + + others = MetadataGroup("others", self.__fabio_reader, kind=FabioReader.DEFAULT) + self.add_node(others) + + +class ImageGroup(commonh5.LazyLoadableGroup): + """Define the image group (sub group of measurement) using Fabio data. + """ + + def __init__(self, name, fabio_reader, parent=None, attrs=None): + commonh5.LazyLoadableGroup.__init__(self, name, parent, attrs) + self.__fabio_reader = fabio_reader + + def _create_child(self): + basepath = self.parent.parent.name + data = commonh5.SoftLink("data", path=basepath + "/instrument/detector_0/data") + self.add_node(data) + detector = commonh5.SoftLink("info", path=basepath + "/instrument/detector_0") + self.add_node(detector) + + +class NxDataPreviewGroup(commonh5.LazyLoadableGroup): + """Define the NxData group which is used as the default NXdata to show the + content of the file. + """ + + def __init__(self, name, fabio_reader, parent=None): + if fabio_reader.is_spectrum(): + interpretation = "spectrum" + else: + interpretation = "image" + attrs = { + "NX_class": "NXdata", + "interpretation": interpretation, + "signal": "data", + } + commonh5.LazyLoadableGroup.__init__(self, name, parent, attrs) + self.__fabio_reader = fabio_reader + + def _create_child(self): + basepath = self.parent.name + data = commonh5.SoftLink("data", path=basepath + "/instrument/detector_0/data") + self.add_node(data) + + +class SampleGroup(commonh5.LazyLoadableGroup): + """Define the image group (sub group of measurement) using Fabio data. + """ + + def __init__(self, name, fabio_reader, parent=None): + attrs = {"NXclass": "NXsample"} + commonh5.LazyLoadableGroup.__init__(self, name, parent, attrs) + self.__fabio_reader = fabio_reader + + def _create_child(self): + if self.__fabio_reader.has_ub_matrix(): + scalar = {"interpretation": "scalar"} + data = self.__fabio_reader.get_unit_cell_abc() + data = commonh5.Dataset("unit_cell_abc", data, attrs=scalar) + self.add_node(data) + unit_cell_data = numpy.zeros((1, 6), numpy.float32) + unit_cell_data[0, :3] = data + data = self.__fabio_reader.get_unit_cell_alphabetagamma() + data = commonh5.Dataset("unit_cell_alphabetagamma", data, attrs=scalar) + self.add_node(data) + unit_cell_data[0, 3:] = data + data = commonh5.Dataset("unit_cell", unit_cell_data, attrs=scalar) + self.add_node(data) + data = self.__fabio_reader.get_ub_matrix() + data = commonh5.Dataset("ub_matrix", data, attrs=scalar) + self.add_node(data) + + +class MeasurementGroup(commonh5.LazyLoadableGroup): + """Define the measurement group for fabio file. + """ + + def __init__(self, name, fabio_reader, parent=None, attrs=None): + commonh5.LazyLoadableGroup.__init__(self, name, parent, attrs) + self.__fabio_reader = fabio_reader + + def _create_child(self): + keys = self.__fabio_reader.get_keys(FabioReader.COUNTER) + + # create image measurement but take care that no other metadata use + # this name + for i in range(1000): + name = "image_%i" % i + if name not in keys: + data = ImageGroup(name, self.__fabio_reader) + self.add_node(data) + break + else: + raise Exception("image_i for 0..1000 already used") + + # add all counters + for name in keys: + data = self.__fabio_reader.get_value(FabioReader.COUNTER, name) + dataset = commonh5.Dataset(name, data) + self.add_node(dataset) + + +class FabioReader(object): + """Class which read and cache data and metadata from a fabio image.""" + + DEFAULT = 0 + COUNTER = 1 + POSITIONER = 2 + + def __init__(self, file_name=None, fabio_image=None, file_series=None): + """ + Constructor + + :param str file_name: File name of the image file to read + :param fabio.fabioimage.FabioImage fabio_image: An already openned + :class:`fabio.fabioimage.FabioImage` instance. + :param Union[list[str],fabio.file_series.file_series] file_series: An + list of file name or a :class:`fabio.file_series.file_series` + instance + """ + self.__at_least_32bits = False + self.__signed_type = False + + self.__load(file_name, fabio_image, file_series) + self.__counters = {} + self.__positioners = {} + self.__measurements = {} + self.__key_filters = set([]) + self.__data = None + self.__frame_count = self.frame_count() + self._read() + + def __load(self, file_name=None, fabio_image=None, file_series=None): + if file_name is not None and fabio_image: + raise TypeError("Parameters file_name and fabio_image are mutually exclusive.") + if file_name is not None and fabio_image: + raise TypeError("Parameters fabio_image and file_series are mutually exclusive.") + + self.__must_be_closed = False + + if file_name is not None: + self.__fabio_file = fabio.open(file_name) + self.__must_be_closed = True + elif fabio_image is not None: + if isinstance(fabio_image, fabio.fabioimage.FabioImage): + self.__fabio_file = fabio_image + else: + raise TypeError("FabioImage expected but %s found.", fabio_image.__class__) + elif file_series is not None: + if isinstance(file_series, list): + self.__fabio_file = _FileSeries(file_series) + elif isinstance(file_series, fabio.file_series.file_series): + self.__fabio_file = file_series + else: + raise TypeError("file_series or list expected but %s found.", file_series.__class__) + + def close(self): + """Close the object, and free up associated resources. + + The associated FabioImage is closed only if the object was created from + a filename by this class itself. + + After calling this method, attempts to use the object (and children) + may fail. + """ + if self.__must_be_closed: + # Make sure the API of fabio provide it a 'close' method + # TODO the test can be removed if fabio version >= 0.8 + if hasattr(self.__fabio_file, "close"): + self.__fabio_file.close() + self.__fabio_file = None + + def fabio_file(self): + return self.__fabio_file + + def frame_count(self): + """Returns the number of frames available.""" + if isinstance(self.__fabio_file, fabio.file_series.file_series): + return len(self.__fabio_file) + elif isinstance(self.__fabio_file, fabio.fabioimage.FabioImage): + return self.__fabio_file.nframes + else: + raise TypeError("Unsupported type %s", self.__fabio_file.__class__) + + def iter_frames(self): + """Iter all the available frames. + + A frame provides at least `data` and `header` attributes. + """ + if isinstance(self.__fabio_file, fabio.file_series.file_series): + for file_number in range(len(self.__fabio_file)): + with self.__fabio_file.jump_image(file_number) as fabio_image: + # return the first frame only + assert(fabio_image.nframes == 1) + yield fabio_image + elif isinstance(self.__fabio_file, fabio.fabioimage.FabioImage): + for frame_count in range(self.__fabio_file.nframes): + if self.__fabio_file.nframes == 1: + yield self.__fabio_file + else: + yield self.__fabio_file.getframe(frame_count) + else: + raise TypeError("Unsupported type %s", self.__fabio_file.__class__) + + def _create_data(self): + """Initialize hold data by merging all frames into a single cube. + + Choose the cube size which fit the best the data. If some images are + smaller than expected, the empty space is set to 0. + + The computation is cached into the class, and only done ones. + """ + images = [] + for fabio_frame in self.iter_frames(): + images.append(fabio_frame.data) + + # returns the data without extra dim in case of single frame + if len(images) == 1: + return images[0] + + # get the max size + max_dim = max([i.ndim for i in images]) + max_shape = [0] * max_dim + for image in images: + for dim in range(image.ndim): + if image.shape[dim] > max_shape[dim]: + max_shape[dim] = image.shape[dim] + max_shape = tuple(max_shape) + + # fix smallest images + for index, image in enumerate(images): + if image.shape == max_shape: + continue + location = [slice(0, i) for i in image.shape] + while len(location) < max_dim: + location.append(0) + normalized_image = numpy.zeros(max_shape, dtype=image.dtype) + normalized_image[tuple(location)] = image + images[index] = normalized_image + + # create a cube + return numpy.array(images) + + def __get_dict(self, kind): + """Returns a dictionary from according to an expected kind""" + if kind == self.DEFAULT: + return self.__measurements + elif kind == self.COUNTER: + return self.__counters + elif kind == self.POSITIONER: + return self.__positioners + else: + raise Exception("Unexpected kind %s", kind) + + def get_data(self): + """Returns a cube from all available data from frames + + :rtype: numpy.ndarray + """ + if self.__data is None: + self.__data = self._create_data() + return self.__data + + def get_keys(self, kind): + """Get all available keys according to a kind of metadata. + + :rtype: list + """ + return self.__get_dict(kind).keys() + + def get_value(self, kind, name): + """Get a metadata value according to the kind and the name. + + :rtype: numpy.ndarray + """ + value = self.__get_dict(kind)[name] + if not isinstance(value, numpy.ndarray): + if kind in [self.COUNTER, self.POSITIONER]: + # Force normalization for counters and positioners + old = self._set_vector_normalization(at_least_32bits=True, signed_type=True) + else: + old = None + value = self._convert_metadata_vector(value) + self.__get_dict(kind)[name] = value + if old is not None: + self._set_vector_normalization(*old) + return value + + def _set_counter_value(self, frame_id, name, value): + """Set a counter metadata according to the frame id""" + if name not in self.__counters: + self.__counters[name] = [None] * self.__frame_count + self.__counters[name][frame_id] = value + + def _set_positioner_value(self, frame_id, name, value): + """Set a positioner metadata according to the frame id""" + if name not in self.__positioners: + self.__positioners[name] = [None] * self.__frame_count + self.__positioners[name][frame_id] = value + + def _set_measurement_value(self, frame_id, name, value): + """Set a measurement metadata according to the frame id""" + if name not in self.__measurements: + self.__measurements[name] = [None] * self.__frame_count + self.__measurements[name][frame_id] = value + + def _enable_key_filters(self, fabio_file): + self.__key_filters.clear() + if hasattr(fabio_file, "RESERVED_HEADER_KEYS"): + # Provided in fabio 0.5 + for key in fabio_file.RESERVED_HEADER_KEYS: + self.__key_filters.add(key.lower()) + + def _read(self): + """Read all metadata from the fabio file and store it into this + object.""" + + file_series = isinstance(self.__fabio_file, fabio.file_series.file_series) + if not file_series: + self._enable_key_filters(self.__fabio_file) + + for frame_id, fabio_frame in enumerate(self.iter_frames()): + if file_series: + self._enable_key_filters(fabio_frame) + self._read_frame(frame_id, fabio_frame.header) + + def _is_filtered_key(self, key): + """ + If this function returns True, the :meth:`_read_key` while not be + called with this `key`while reading the metatdata frame. + + :param str key: A key of the metadata + :rtype: bool + """ + return key.lower() in self.__key_filters + + def _read_frame(self, frame_id, header): + """Read all metadata from a frame and store it into this + object.""" + for key, value in header.items(): + if self._is_filtered_key(key): + continue + self._read_key(frame_id, key, value) + + def _read_key(self, frame_id, name, value): + """Read a key from the metadata and cache it into this object.""" + self._set_measurement_value(frame_id, name, value) + + def _set_vector_normalization(self, at_least_32bits, signed_type): + previous = self.__at_least_32bits, self.__signed_type + self.__at_least_32bits = at_least_32bits + self.__signed_type = signed_type + return previous + + def _normalize_vector_type(self, dtype): + """Normalize the """ + if self.__at_least_32bits: + if numpy.issubdtype(dtype, numpy.signedinteger): + dtype = numpy.result_type(dtype, numpy.uint32) + if numpy.issubdtype(dtype, numpy.unsignedinteger): + dtype = numpy.result_type(dtype, numpy.uint32) + elif numpy.issubdtype(dtype, numpy.floating): + dtype = numpy.result_type(dtype, numpy.float32) + elif numpy.issubdtype(dtype, numpy.complexfloating): + dtype = numpy.result_type(dtype, numpy.complex64) + if self.__signed_type: + if numpy.issubdtype(dtype, numpy.unsignedinteger): + signed = numpy.dtype("%s%i" % ('i', dtype.itemsize)) + dtype = numpy.result_type(dtype, signed) + return dtype + + def _convert_metadata_vector(self, values): + """Convert a list of numpy data into a numpy array with the better + fitting type.""" + converted = [] + types = set([]) + has_none = False + is_array = False + array = [] + + for v in values: + if v is None: + converted.append(None) + has_none = True + array.append(None) + else: + c = self._convert_value(v) + if c.shape != tuple(): + array.append(v.split(" ")) + is_array = True + else: + array.append(v) + converted.append(c) + types.add(c.dtype) + + if has_none and len(types) == 0: + # That's a list of none values + return numpy.array([0] * len(values), numpy.int8) + + result_type = numpy.result_type(*types) + + if issubclass(result_type.type, numpy.string_): + # use the raw data to create the array + result = values + elif issubclass(result_type.type, numpy.unicode_): + # use the raw data to create the array + result = values + else: + result = converted + + result_type = self._normalize_vector_type(result_type) + + if has_none: + # Fix missing data according to the array type + if result_type.kind == "S": + none_value = b"" + elif result_type.kind == "U": + none_value = u"" + elif result_type.kind == "f": + none_value = numpy.float64("NaN") + elif result_type.kind == "i": + none_value = numpy.int64(0) + elif result_type.kind == "u": + none_value = numpy.int64(0) + elif result_type.kind == "b": + none_value = numpy.bool_(False) + else: + none_value = None + + for index, r in enumerate(result): + if r is not None: + continue + result[index] = none_value + values[index] = none_value + array[index] = none_value + + if result_type.kind in "uifd" and len(types) > 1 and len(values) > 1: + # Catch numerical precision + if is_array and len(array) > 1: + return numpy.array(array, dtype=result_type) + else: + return numpy.array(values, dtype=result_type) + return numpy.array(result, dtype=result_type) + + def _convert_value(self, value): + """Convert a string into a numpy object (scalar or array). + + The value is most of the time a string, but it can be python object + in case if TIFF decoder for example. + """ + if isinstance(value, list): + # convert to a numpy array + return numpy.array(value) + if isinstance(value, dict): + # convert to a numpy associative array + key_dtype = numpy.min_scalar_type(list(value.keys())) + value_dtype = numpy.min_scalar_type(list(value.values())) + associative_type = [('key', key_dtype), ('value', value_dtype)] + assert key_dtype.kind != "O" and value_dtype.kind != "O" + return numpy.array(list(value.items()), dtype=associative_type) + if isinstance(value, numbers.Number): + dtype = numpy.min_scalar_type(value) + assert dtype.kind != "O" + return dtype.type(value) + + if isinstance(value, bytes): + try: + value = value.decode('utf-8') + except UnicodeDecodeError: + return numpy.void(value) + + if " " in value: + result = self._convert_list(value) + else: + result = self._convert_scalar_value(value) + return result + + def _convert_scalar_value(self, value): + """Convert a string into a numpy int or float. + + If it is not possible it returns a numpy string. + """ + try: + numpy_type = silx.utils.number.min_numerical_convertible_type(value) + converted = numpy_type(value) + except ValueError: + converted = numpy.string_(value) + return converted + + def _convert_list(self, value): + """Convert a string into a typed numpy array. + + If it is not possible it returns a numpy string. + """ + try: + numpy_values = [] + values = value.split(" ") + types = set([]) + for string_value in values: + v = self._convert_scalar_value(string_value) + numpy_values.append(v) + types.add(v.dtype.type) + + result_type = numpy.result_type(*types) + + if issubclass(result_type.type, (numpy.string_, bytes)): + # use the raw data to create the result + return numpy.string_(value) + elif issubclass(result_type.type, (numpy.unicode_, str)): + # use the raw data to create the result + return numpy.unicode_(value) + else: + if len(types) == 1: + return numpy.array(numpy_values, dtype=result_type) + else: + return numpy.array(values, dtype=result_type) + except ValueError: + return numpy.string_(value) + + def has_sample_information(self): + """Returns true if there is information about the sample in the + file + + :rtype: bool + """ + return self.has_ub_matrix() + + def has_ub_matrix(self): + """Returns true if a UB matrix is available. + + :rtype: bool + """ + return False + + def is_spectrum(self): + """Returns true if the data should be interpreted as + MCA data. + + :rtype: bool + """ + return False + + +class EdfFabioReader(FabioReader): + """Class which read and cache data and metadata from a fabio image. + + It is mostly the same as FabioReader, but counter_mne and + motor_mne are parsed using a special way. + """ + + def __init__(self, file_name=None, fabio_image=None, file_series=None): + FabioReader.__init__(self, file_name, fabio_image, file_series) + self.__unit_cell_abc = None + self.__unit_cell_alphabetagamma = None + self.__ub_matrix = None + + def _read_frame(self, frame_id, header): + """Overwrite the method to check and parse special keys: counter and + motors keys.""" + self.__catch_keys = set([]) + if "motor_pos" in header and "motor_mne" in header: + self.__catch_keys.add("motor_pos") + self.__catch_keys.add("motor_mne") + self._read_mnemonic_key(frame_id, "motor", header) + if "counter_pos" in header and "counter_mne" in header: + self.__catch_keys.add("counter_pos") + self.__catch_keys.add("counter_mne") + self._read_mnemonic_key(frame_id, "counter", header) + FabioReader._read_frame(self, frame_id, header) + + def _is_filtered_key(self, key): + if key in self.__catch_keys: + return True + return FabioReader._is_filtered_key(self, key) + + def _get_mnemonic_key(self, base_key, header): + mnemonic_values_key = base_key + "_mne" + mnemonic_values = header.get(mnemonic_values_key, "") + mnemonic_values = mnemonic_values.split() + pos_values_key = base_key + "_pos" + pos_values = header.get(pos_values_key, "") + pos_values = pos_values.split() + + result = collections.OrderedDict() + nbitems = max(len(mnemonic_values), len(pos_values)) + for i in range(nbitems): + if i < len(mnemonic_values): + mnemonic = mnemonic_values[i] + else: + # skip the element + continue + + if i < len(pos_values): + pos = pos_values[i] + else: + pos = None + + result[mnemonic] = pos + return result + + def _read_mnemonic_key(self, frame_id, base_key, header): + """Parse a mnemonic key""" + is_counter = base_key == "counter" + is_positioner = base_key == "motor" + data = self._get_mnemonic_key(base_key, header) + + for mnemonic, pos in data.items(): + if is_counter: + self._set_counter_value(frame_id, mnemonic, pos) + elif is_positioner: + self._set_positioner_value(frame_id, mnemonic, pos) + else: + raise Exception("State unexpected (base_key: %s)" % base_key) + + def _get_first_header(self): + """ + ..note:: This function can be cached + """ + fabio_file = self.fabio_file() + if isinstance(fabio_file, fabio.file_series.file_series): + return fabio_file.jump_image(0).header + return fabio_file.header + + def has_ub_matrix(self): + """Returns true if a UB matrix is available. + + :rtype: bool + """ + header = self._get_first_header() + expected_keys = set(["UB_mne", "UB_pos", "sample_mne", "sample_pos"]) + return expected_keys.issubset(header) + + def parse_ub_matrix(self): + header = self._get_first_header() + ub_data = self._get_mnemonic_key("UB", header) + s_data = self._get_mnemonic_key("sample", header) + if len(ub_data) > 9: + _logger.warning("UB_mne and UB_pos contains more than expected keys.") + if len(s_data) > 6: + _logger.warning("sample_mne and sample_pos contains more than expected keys.") + + data = numpy.array([s_data["U0"], s_data["U1"], s_data["U2"]], dtype=float) + unit_cell_abc = data + + data = numpy.array([s_data["U3"], s_data["U4"], s_data["U5"]], dtype=float) + unit_cell_alphabetagamma = data + + ub_matrix = numpy.array([[ + [ub_data["UB0"], ub_data["UB1"], ub_data["UB2"]], + [ub_data["UB3"], ub_data["UB4"], ub_data["UB5"]], + [ub_data["UB6"], ub_data["UB7"], ub_data["UB8"]]]], dtype=float) + + self.__unit_cell_abc = unit_cell_abc + self.__unit_cell_alphabetagamma = unit_cell_alphabetagamma + self.__ub_matrix = ub_matrix + + def get_unit_cell_abc(self): + """Get a numpy array data as defined for the dataset unit_cell_abc + from the NXsample dataset. + + :rtype: numpy.ndarray + """ + if self.__unit_cell_abc is None: + self.parse_ub_matrix() + return self.__unit_cell_abc + + def get_unit_cell_alphabetagamma(self): + """Get a numpy array data as defined for the dataset + unit_cell_alphabetagamma from the NXsample dataset. + + :rtype: numpy.ndarray + """ + if self.__unit_cell_alphabetagamma is None: + self.parse_ub_matrix() + return self.__unit_cell_alphabetagamma + + def get_ub_matrix(self): + """Get a numpy array data as defined for the dataset ub_matrix + from the NXsample dataset. + + :rtype: numpy.ndarray + """ + if self.__ub_matrix is None: + self.parse_ub_matrix() + return self.__ub_matrix + + def is_spectrum(self): + """Returns true if the data should be interpreted as + MCA data. + EDF files or file series, with two or more header names starting with + "MCA", should be interpreted as MCA data. + + :rtype: bool + """ + count = 0 + for key in self._get_first_header(): + if key.lower().startswith("mca"): + count += 1 + if count >= 2: + return True + return False + + +class File(commonh5.File): + """Class which handle a fabio image as a mimick of a h5py.File. + """ + + def __init__(self, file_name=None, fabio_image=None, file_series=None): + """ + Constructor + + :param str file_name: File name of the image file to read + :param fabio.fabioimage.FabioImage fabio_image: An already openned + :class:`fabio.fabioimage.FabioImage` instance. + :param Union[list[str],fabio.file_series.file_series] file_series: An + list of file name or a :class:`fabio.file_series.file_series` + instance + """ + self.__fabio_reader = self.create_fabio_reader(file_name, fabio_image, file_series) + if fabio_image is not None: + file_name = fabio_image.filename + scan = self.create_scan_group(self.__fabio_reader) + + attrs = {"NX_class": "NXroot", + "file_time": datetime.datetime.now().isoformat(), + "creator": "silx %s" % silx_version, + "default": scan.basename} + if file_name is not None: + attrs["file_name"] = file_name + commonh5.File.__init__(self, name=file_name, attrs=attrs) + self.add_node(scan) + + def create_scan_group(self, fabio_reader): + """Factory to create the scan group. + + :param FabioImage fabio_image: A Fabio image + :param FabioReader fabio_reader: A reader for the Fabio image + :rtype: commonh5.Group + """ + nxdata = NxDataPreviewGroup("image", fabio_reader) + scan_attrs = { + "NX_class": "NXentry", + "default": nxdata.basename, + } + scan = commonh5.Group("scan_0", attrs=scan_attrs) + instrument = commonh5.Group("instrument", attrs={"NX_class": "NXinstrument"}) + measurement = MeasurementGroup("measurement", fabio_reader, attrs={"NX_class": "NXcollection"}) + file_ = commonh5.Group("file", attrs={"NX_class": "NXcollection"}) + positioners = MetadataGroup("positioners", fabio_reader, FabioReader.POSITIONER, attrs={"NX_class": "NXpositioner"}) + raw_header = RawHeaderData("scan_header", fabio_reader, self) + detector = DetectorGroup("detector_0", fabio_reader) + + scan.add_node(instrument) + instrument.add_node(positioners) + instrument.add_node(file_) + instrument.add_node(detector) + file_.add_node(raw_header) + scan.add_node(measurement) + scan.add_node(nxdata) + + if fabio_reader.has_sample_information(): + sample = SampleGroup("sample", fabio_reader) + scan.add_node(sample) + + return scan + + def create_fabio_reader(self, file_name, fabio_image, file_series): + """Factory to create fabio reader. + + :rtype: FabioReader""" + use_edf_reader = False + first_file_name = None + first_image = None + + if isinstance(file_series, list): + first_file_name = file_series[0] + elif isinstance(file_series, fabio.file_series.file_series): + first_image = file_series.first_image() + elif fabio_image is not None: + first_image = fabio_image + else: + first_file_name = file_name + + if first_file_name is not None: + _, ext = os.path.splitext(first_file_name) + ext = ext[1:] + edfimage = fabio.edfimage.EdfImage + if hasattr(edfimage, "DEFAULT_EXTENTIONS"): + # Typo on fabio 0.5 + edf_extensions = edfimage.DEFAULT_EXTENTIONS + else: + edf_extensions = edfimage.DEFAULT_EXTENSIONS + use_edf_reader = ext in edf_extensions + elif first_image is not None: + use_edf_reader = isinstance(first_image, fabio.edfimage.EdfImage) + else: + assert(False) + + if use_edf_reader: + reader = EdfFabioReader(file_name, fabio_image, file_series) + else: + reader = FabioReader(file_name, fabio_image, file_series) + return reader + + def close(self): + """Close the object, and free up associated resources. + + After calling this method, attempts to use the object (and children) + may fail. + """ + self.__fabio_reader.close() + self.__fabio_reader = None diff --git a/src/silx/io/fioh5.py b/src/silx/io/fioh5.py new file mode 100644 index 0000000..75fe587 --- /dev/null +++ b/src/silx/io/fioh5.py @@ -0,0 +1,490 @@ +# coding: utf-8 +# /*########################################################################## +# Copyright (C) 2021 Timo Fuchs +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. +# +# ############################################################################*/ +"""This module provides a h5py-like API to access FioFile data. + +API description ++++++++++++++++ + +Fiofile data structure exposed by this API: + +:: + + / + n.1/ + title = "…" + start_time = "…" + instrument/ + fiofile/ + comments = "…" + parameter = "…" + comment = "…" + parameter/ + parameter_name = value + + measurement/ + colname0 = … + colname1 = … + … + + +The top level scan number ``n.1`` is determined from the filename as in +``prefix_n.fio``. (e.g. ``eh1_sixc_00045.fio`` would give ``45.1``) +If no number is available, will use the filename instead. + +``comments`` and ``parameter`` in group ``fiofile`` are the raw headers as they +appear in the original file, as a string of lines separated by newline +(``\\n``) characters. ``comment`` are the remaining comments, +which were not parsed. + + + +The title is the content of the first comment header line +(e.g ``"ascan ss1vo -4.55687 -0.556875 40 0.2"``). +The start_time is parsed from the second comment line. + +Datasets are stored in the data format specified in the fio file header. + +Scan data (e.g. ``/1.1/measurement/colname0``) is accessed by column, +the dataset name ``colname0`` being the column label as defined in the +``Col …`` header line. + +If a ``/`` character is present in a column label or in a motor name in the +original FIO file, it will be substituted with a ``%`` character in the +corresponding dataset name. + +MCA data is not yet supported. + +This reader requires a fio file as defined in +src/sardana/macroserver/recorders/storage.py of the Sardana project +(https://github.com/sardana-org/sardana). + + +Accessing data +++++++++++++++ + +Data and groups are accessed in :mod:`h5py` fashion:: + + from silx.io.fioh5 import FioH5 + + # Open a FioFile + fiofh5 = FioH5("test_00056.fio") + + # using FioH5 as a regular group to access scans + scan1group = fiofh5["56.1"] + instrument_group = scan1group["instrument"] + + # alternative: full path access + measurement_group = fiofh5["/56.1/measurement"] + + # accessing a scan data column by name as a 1D numpy array + data_array = measurement_group["Pslit HGap"] + + +:class:`FioH5` files and groups provide a :meth:`keys` method:: + + >>> fiofh5.keys() + ['96.1', '97.1', '98.1'] + >>> fiofh5['96.1'].keys() + ['title', 'start_time', 'instrument', 'measurement'] + +They can also be treated as iterators: + +.. code-block:: python + + from silx.io import is_dataset + + for scan_group in FioH5("test_00056.fio"): + dataset_names = [item.name in scan_group["measurement"] if + is_dataset(item)] + print("Found data columns in scan " + scan_group.name) + print(", ".join(dataset_names)) + +You can test for existence of data or groups:: + + >>> "/1.1/measurement/Pslit HGap" in fiofh5 + True + >>> "positioners" in fiofh5["/2.1/instrument"] + True + >>> "spam" in fiofh5["1.1"] + False + +""" + +__authors__ = ["T. Fuchs"] +__license__ = "MIT" +__date__ = "09/04/2021" + + +import os + +import datetime +import logging +import io + +import h5py +import numpy + +from silx import version as silx_version +from . import commonh5 + +from .spech5 import to_h5py_utf8 + +logger1 = logging.getLogger(__name__) + +if h5py.version.version_tuple[0] < 3: + text_dtype = h5py.special_dtype(vlen=str) # old API +else: + text_dtype = 'O' # variable-length string (supported as of h5py > 3.0) + +ABORTLINENO = 5 + +dtypeConverter = {'STRING': text_dtype, + 'DOUBLE': 'f8', + 'FLOAT': 'f4', + 'INTEGER': 'i8', + 'BOOLEAN': '?'} + + +def is_fiofile(filename): + """Test if a file is a FIO file, by checking if three consecutive lines + start with *!*. Tests up to ABORTLINENO lines at the start of the file. + + :param str filename: File path + :return: *True* if file is a FIO file, *False* if it is not a FIO file + :rtype: bool + """ + if not os.path.isfile(filename): + return False + # test for presence of three ! in first lines + with open(filename, "rb") as f: + chunk = f.read(2500) + count = 0 + for i, line in enumerate(chunk.split(b"\n")): + if line.startswith(b"!"): + count += 1 + if count >= 3: + return True + else: + count = 0 + if i >= ABORTLINENO: + break + return False + + +class FioFile(object): + """This class opens a FIO file and reads the data. + + """ + + def __init__(self, filepath): + # parse filename + filename = os.path.basename(filepath) + fnowithsuffix = filename.split('_')[-1] + try: + self.scanno = int(fnowithsuffix.split('.')[0]) + except Exception: + self.scanno = None + logger1.warning("Cannot parse scan number of file %s", filename) + + with open(filepath, 'r') as fiof: + + prev = 0 + line_counter = 0 + + while(True): + line = fiof.readline() + if line.startswith('!'): # skip comments + prev = fiof.tell() + line_counter = 0 + continue + if line.startswith('%c'): # comment section + line_counter = 0 + self.commentsection = '' + line = fiof.readline() + while(not line.startswith('%') + and not line.startswith('!')): + self.commentsection += line + prev = fiof.tell() + line = fiof.readline() + if line.startswith('%p'): # parameter section + line_counter = 0 + self.parameterssection = '' + line = fiof.readline() + while(not line.startswith('%') + and not line.startswith('!')): + self.parameterssection += line + prev = fiof.tell() + line = fiof.readline() + if line.startswith('%d'): # data type definitions + line_counter = 0 + self.datacols = [] + self.names = [] + self.dtypes = [] + line = fiof.readline() + while(line.startswith(' Col')): + splitline = line.split() + name = splitline[-2] + self.names.append(name) + dtype = dtypeConverter[splitline[-1]] + self.dtypes.append(dtype) + self.datacols.append((name, dtype)) + prev = fiof.tell() + line = fiof.readline() + fiof.seek(prev) + break + + line_counter += 1 + if line_counter > ABORTLINENO: + raise IOError("Invalid fio file: Found no data " + "after %s lines" % ABORTLINENO) + + self.data = numpy.loadtxt(fiof, + dtype={'names': tuple(self.names), + 'formats': tuple(self.dtypes)}, + comments="!") + + # ToDo: read only last line of file, + # which sometimes contains the end of acquisition timestamp. + + self.parameter = {} + + # parse parameter section: + try: + for line in self.parameterssection.splitlines(): + param, value = line.split(' = ') + self.parameter[param] = value + except Exception: + logger1.warning("Cannot parse parameter section") + + # parse default sardana comments: username and start time + try: + acquiMarker = "acquisition started at" # indicates timestamp + commentlines = self.commentsection.splitlines() + if len(commentlines) >= 2: + self.title = commentlines[0] + l2 = commentlines[1] + acqpos = l2.lower().find(acquiMarker) + if acqpos < 0: + raise Exception("acquisition str not found") + + self.user = l2[:acqpos][4:].strip() + self.start_time = l2[acqpos+len(acquiMarker):].strip() + commentlines = commentlines[2:] + self.comments = "\n".join(commentlines[2:]) + + except Exception: + logger1.warning("Cannot parse default comment section") + self.comments = self.commentsection + self.user = "" + self.start_time = "" + self.title = "" + + +class FioH5NodeDataset(commonh5.Dataset): + """This class inherits :class:`commonh5.Dataset`, to which it adds + little extra functionality. The main additional functionality is the + proxy behavior that allows to mimic the numpy array stored in this + class. + """ + + def __init__(self, name, data, parent=None, attrs=None): + # get proper value types, to inherit from numpy + # attributes (dtype, shape, size) + if isinstance(data, str): + # use unicode (utf-8 when saved to HDF5 output) + value = to_h5py_utf8(data) + elif isinstance(data, float): + # use 32 bits for float scalars + value = numpy.float32(data) + elif isinstance(data, int): + value = numpy.int_(data) + else: + # Enforce numpy array + array = numpy.array(data) + data_kind = array.dtype.kind + + if data_kind in ["S", "U"]: + value = numpy.asarray(array, + dtype=text_dtype) + else: + value = array # numerical data is already the correct datatype + commonh5.Dataset.__init__(self, name, value, parent, attrs) + + def __getattr__(self, item): + """Proxy to underlying numpy array methods. + """ + if hasattr(self[()], item): + return getattr(self[()], item) + + raise AttributeError("FioH5NodeDataset has no attribute %s" % item) + + +class FioH5(commonh5.File): + """This class reads a FIO file and exposes it as a *h5py.File*. + + It inherits :class:`silx.io.commonh5.Group` (via :class:`commonh5.File`), + which implements most of its API. + """ + + def __init__(self, filename, order=1): + """ + :param filename: Path to FioFile in filesystem + :type filename: str + """ + if isinstance(filename, io.IOBase): + # see https://github.com/silx-kit/silx/issues/858 + filename = filename.name + + if not is_fiofile(filename): + raise IOError("File %s is not a FIO file." % filename) + + try: + fiof = FioFile(filename) # reads complete file + except Exception as e: + raise IOError("FIO file %s cannot be read.") from e + + attrs = {"NX_class": to_h5py_utf8("NXroot"), + "file_time": to_h5py_utf8( + datetime.datetime.now().isoformat()), + "file_name": to_h5py_utf8(filename), + "creator": to_h5py_utf8("silx fioh5 %s" % silx_version)} + commonh5.File.__init__(self, filename, attrs=attrs) + + if fiof.scanno is not None: + scan_key = "%s.%s" % (fiof.scanno, int(order)) + else: + scan_key = os.path.splitext(os.path.basename(filename))[0] + + scan_group = FioScanGroup(scan_key, parent=self, scan=fiof) + self.add_node(scan_group) + + +class FioScanGroup(commonh5.Group): + def __init__(self, scan_key, parent, scan): + """ + + :param parent: parent Group + :param str scan_key: Scan key (e.g. "1.1") + :param scan: FioFile object + """ + if hasattr(scan, 'user'): + userattr = to_h5py_utf8(scan.user) + else: + userattr = to_h5py_utf8('') + commonh5.Group.__init__(self, scan_key, parent=parent, + attrs={"NX_class": to_h5py_utf8("NXentry"), + "user": userattr}) + + # 'title', 'start_time' and 'user' are defaults + # in Sardana created files: + if hasattr(scan, 'title'): + title = scan.title + else: + title = scan_key # use scan number as default title + self.add_node(FioH5NodeDataset(name="title", + data=to_h5py_utf8(title), + parent=self)) + + if hasattr(scan, 'start_time'): + start_time = scan.start_time + self.add_node(FioH5NodeDataset(name="start_time", + data=to_h5py_utf8(start_time), + parent=self)) + + self.add_node(FioH5NodeDataset(name="comments", + data=to_h5py_utf8(scan.comments), + parent=self)) + + self.add_node(FioInstrumentGroup(parent=self, scan=scan)) + self.add_node(FioMeasurementGroup(parent=self, scan=scan)) + + +class FioMeasurementGroup(commonh5.Group): + def __init__(self, parent, scan): + """ + + :param parent: parent Group + :param scan: FioFile object + """ + commonh5.Group.__init__(self, name="measurement", parent=parent, + attrs={"NX_class": to_h5py_utf8("NXcollection")}) + + for label in scan.names: + safe_label = label.replace("/", "%") + self.add_node(FioH5NodeDataset(name=safe_label, + data=scan.data[label], + parent=self)) + + +class FioInstrumentGroup(commonh5.Group): + def __init__(self, parent, scan): + """ + + :param parent: parent Group + :param scan: FioFile object + """ + commonh5.Group.__init__(self, name="instrument", parent=parent, + attrs={"NX_class": to_h5py_utf8("NXinstrument")}) + + self.add_node(FioParameterGroup(parent=self, scan=scan)) + self.add_node(FioFileGroup(parent=self, scan=scan)) + self.add_node(FioH5NodeDataset(name="comment", + data=to_h5py_utf8(scan.comments), + parent=self)) + + +class FioFileGroup(commonh5.Group): + def __init__(self, parent, scan): + """ + + :param parent: parent Group + :param scan: FioFile object + """ + commonh5.Group.__init__(self, name="fiofile", parent=parent, + attrs={"NX_class": to_h5py_utf8("NXcollection")}) + + self.add_node(FioH5NodeDataset(name="comments", + data=to_h5py_utf8(scan.commentsection), + parent=self)) + + self.add_node(FioH5NodeDataset(name="parameter", + data=to_h5py_utf8(scan.parameterssection), + parent=self)) + + +class FioParameterGroup(commonh5.Group): + def __init__(self, parent, scan): + """ + + :param parent: parent Group + :param scan: FioFile object + """ + commonh5.Group.__init__(self, name="parameter", parent=parent, + attrs={"NX_class": to_h5py_utf8("NXcollection")}) + + for label in scan.parameter: + safe_label = label.replace("/", "%") + self.add_node(FioH5NodeDataset(name=safe_label, + data=to_h5py_utf8(scan.parameter[label]), + parent=self)) diff --git a/src/silx/io/h5py_utils.py b/src/silx/io/h5py_utils.py new file mode 100644 index 0000000..fb04152 --- /dev/null +++ b/src/silx/io/h5py_utils.py @@ -0,0 +1,440 @@ +# coding: utf-8 +# /*########################################################################## +# Copyright (C) 2016-2021 European Synchrotron Radiation Facility +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. +# +# ############################################################################*/ +""" +This module provides utility methods on top of h5py, mainly to handle +parallel writing and reading. +""" + +__authors__ = ["W. de Nolf"] +__license__ = "MIT" +__date__ = "27/01/2020" + + +import os +import sys +import traceback +import logging +import h5py + +from .._version import calc_hexversion +from ..utils import retry as retry_mod +from silx.utils.deprecation import deprecated_warning + +_logger = logging.getLogger(__name__) + +IS_WINDOWS = sys.platform == "win32" + +H5PY_HEX_VERSION = calc_hexversion(*h5py.version.version_tuple[:3]) +HDF5_HEX_VERSION = calc_hexversion(*h5py.version.hdf5_version_tuple[:3]) + +HDF5_SWMR_VERSION = calc_hexversion(*h5py.get_config().swmr_min_hdf5_version[:3]) +HAS_SWMR = HDF5_HEX_VERSION >= HDF5_SWMR_VERSION + +HAS_TRACK_ORDER = H5PY_HEX_VERSION >= calc_hexversion(2, 9, 0) + +if h5py.version.hdf5_version_tuple[:2] == (1, 10): + HDF5_HAS_LOCKING_ARGUMENT = HDF5_HEX_VERSION >= calc_hexversion(1, 10, 7) +else: + HDF5_HAS_LOCKING_ARGUMENT = HDF5_HEX_VERSION >= calc_hexversion(1, 12, 1) +H5PY_HAS_LOCKING_ARGUMENT = H5PY_HEX_VERSION >= calc_hexversion(3, 5, 0) +HAS_LOCKING_ARGUMENT = HDF5_HAS_LOCKING_ARGUMENT & H5PY_HAS_LOCKING_ARGUMENT + +LATEST_LIBVER_IS_V108 = HDF5_HEX_VERSION < calc_hexversion(1, 10, 0) + + +def _libver_low_bound_is_v108(libver) -> bool: + if libver is None: + return True + if LATEST_LIBVER_IS_V108: + return True + if isinstance(libver, str): + low = libver + else: + low = libver[0] + if low == "latest": + return False + return low == "v108" + + +def _hdf5_file_locking(mode="r", locking=None, swmr=None, libver=None, **_): + """Concurrent access by disabling file locking is not supported + in these cases: + + * mode != "r": causes file corruption + * SWMR: does not work + * libver > v108 and file already locked: does not work + * windows and HDF5_HAS_LOCKING_ARGUMENT and file already locked: does not work + + :param str or None mode: read-only by default + :param bool or None locking: by default it is disabled for `mode='r'` + and `swmr=False` and enabled for all + other modes. + :param bool or None swmr: try both modes when `mode='r'` and `swmr=None` + :param None or str or tuple libver: + :returns bool: + """ + if locking is None: + locking = bool(mode != "r" or swmr) + if not locking: + if mode != "r": + raise ValueError("Locking is mandatory for HDF5 writing") + if swmr: + raise ValueError("Locking is mandatory for HDF5 SWMR mode") + if IS_WINDOWS and HDF5_HAS_LOCKING_ARGUMENT: + _logger.debug( + "Non-locking readers will fail when a writer has already locked the HDF5 file (this restriction applies to libhdf5 >= 1.12.1 or libhdf5 >= 1.10.7 on Windows)" + ) + if not _libver_low_bound_is_v108(libver): + _logger.debug( + "Non-locking readers will fail when a writer has already locked the HDF5 file (this restriction applies to libver >= v110)" + ) + return locking + + +def _is_h5py_exception(e): + """ + :param BaseException e: + :returns bool: + """ + for frame in traceback.walk_tb(e.__traceback__): + if frame[0].f_locals.get("__package__", None) == "h5py": + return True + return False + + +def _retry_h5py_error(e): + """ + :param BaseException e: + :returns bool: + """ + if _is_h5py_exception(e): + if isinstance(e, (OSError, RuntimeError)): + return True + elif isinstance(e, KeyError): + # For example this needs to be retried: + # KeyError: 'Unable to open object (bad object header version number)' + return "Unable to open object" in str(e) + elif isinstance(e, retry_mod.RetryError): + return True + return False + + +def retry(**kw): + r"""Decorator for a method that needs to be executed until it not longer + fails on HDF5 IO. Mainly used for reading an HDF5 file that is being + written. + + :param \**kw: see `silx.utils.retry` + """ + kw.setdefault("retry_on_error", _retry_h5py_error) + return retry_mod.retry(**kw) + + +def retry_contextmanager(**kw): + r"""Decorator to make a context manager from a method that needs to be + entered until it not longer fails on HDF5 IO. Mainly used for reading + an HDF5 file that is being written. + + :param \**kw: see `silx.utils.retry_contextmanager` + """ + kw.setdefault("retry_on_error", _retry_h5py_error) + return retry_mod.retry_contextmanager(**kw) + + +def retry_in_subprocess(**kw): + r"""Same as `retry` but it also retries segmentation faults. + + On Window you cannot use this decorator with the "@" syntax: + + .. code-block:: python + + def _method(*args, **kw): + ... + + method = retry_in_subprocess()(_method) + + :param \**kw: see `silx.utils.retry_in_subprocess` + """ + kw.setdefault("retry_on_error", _retry_h5py_error) + return retry_mod.retry_in_subprocess(**kw) + + +def group_has_end_time(h5item): + """Returns True when the HDF5 item is a Group with an "end_time" + dataset. A reader can use this as an indication that the Group + has been fully written (at least if the writer supports this). + + :param Union[h5py.Group,h5py.Dataset] h5item: + :returns bool: + """ + if isinstance(h5item, h5py.Group): + return "end_time" in h5item + else: + return False + + +@retry_contextmanager() +def open_item(filename, name, retry_invalid=False, validate=None, **open_options): + r"""Yield an HDF5 dataset or group (retry until it can be instantiated). + + :param str filename: + :param bool retry_invalid: retry when item is missing or not valid + :param callable or None validate: + :param \**open_options: see `File.__init__` + :yields Dataset, Group or None: + """ + with File(filename, **open_options) as h5file: + try: + item = h5file[name] + except KeyError as e: + if "doesn't exist" in str(e): + if retry_invalid: + raise retry_mod.RetryError + else: + item = None + else: + raise + if callable(validate) and item is not None: + if not validate(item): + if retry_invalid: + raise retry_mod.RetryError + else: + item = None + yield item + + +def _top_level_names(filename, include_only=group_has_end_time, **open_options): + r"""Return all valid top-level HDF5 names. + + :param str filename: + :param callable or None include_only: + :param \**open_options: see `File.__init__` + :returns list(str): + """ + with File(filename, **open_options) as h5file: + try: + if callable(include_only): + return [name for name in h5file["/"] if include_only(h5file[name])] + else: + return list(h5file["/"]) + except KeyError: + raise retry_mod.RetryError + + +top_level_names = retry()(_top_level_names) +safe_top_level_names = retry_in_subprocess()(_top_level_names) + + +class Hdf5FileLockingManager: + """Manage HDF5 file locking in the current process through the HDF5_USE_FILE_LOCKING + environment variable. + """ + + def __init__(self) -> None: + self._hdf5_file_locking = None + self._nfiles_open = 0 + + def opened(self): + self._add_nopen(1) + + def closed(self): + self._add_nopen(-1) + if not self._nfiles_open: + self._restore_locking_env() + + def set_locking(self, locking): + if self._nfiles_open: + self._check_locking_env(locking) + else: + self._set_locking_env(locking) + + def _add_nopen(self, v): + self._nfiles_open = max(self._nfiles_open + v, 0) + + def _set_locking_env(self, enable): + self._backup_locking_env() + if enable: + os.environ["HDF5_USE_FILE_LOCKING"] = "TRUE" + elif enable is None: + try: + del os.environ["HDF5_USE_FILE_LOCKING"] + except KeyError: + pass + else: + os.environ["HDF5_USE_FILE_LOCKING"] = "FALSE" + + def _get_locking_env(self): + v = os.environ.get("HDF5_USE_FILE_LOCKING") + if v == "TRUE": + return True + elif v is None: + return None + else: + return False + + def _check_locking_env(self, enable): + if enable != self._get_locking_env(): + if enable: + raise RuntimeError( + "Close all HDF5 files before enabling HDF5 file locking" + ) + else: + raise RuntimeError( + "Close all HDF5 files before disabling HDF5 file locking" + ) + + def _backup_locking_env(self): + v = os.environ.get("HDF5_USE_FILE_LOCKING") + if v is None: + self._hdf5_file_locking = None + else: + self._hdf5_file_locking = v == "TRUE" + + def _restore_locking_env(self): + self._set_locking_env(self._hdf5_file_locking) + self._hdf5_file_locking = None + + +class File(h5py.File): + """Takes care of HDF5 file locking and SWMR mode without the need + to handle those explicitely. + + When file locking is managed through the HDF5_USE_FILE_LOCKING environment + variable, you cannot open different files simultaneously with different modes. + """ + + _SWMR_LIBVER = "latest" + + if HAS_LOCKING_ARGUMENT: + _LOCKING_MGR = None + else: + _LOCKING_MGR = Hdf5FileLockingManager() + + def __init__( + self, + filename, + mode=None, + locking=None, + enable_file_locking=None, + swmr=None, + libver=None, + **kwargs, + ): + r"""The arguments `locking` and `swmr` should not be + specified explicitly for normal use cases. + + :param str filename: + :param str or None mode: read-only by default + :param bool or None locking: by default it is disabled for `mode='r'` + and `swmr=False` and enabled for all + other modes. + :param bool or None enable_file_locking: deprecated + :param bool or None swmr: try both modes when `mode='r'` and `swmr=None` + :param None or str or tuple libver: + :param \**kwargs: see `h5py.File.__init__` + """ + # File locking behavior has changed in recent versions of libhdf5 + if HDF5_HAS_LOCKING_ARGUMENT != H5PY_HAS_LOCKING_ARGUMENT: + _logger.critical( + "The version of libhdf5 ({}) used by h5py ({}) is not supported: " + "Do not expect file locking to work.".format( + h5py.version.hdf5_version, h5py.version.version + ) + ) + + if mode is None: + mode = "r" + elif mode not in ("r", "w", "w-", "x", "a", "r+"): + raise ValueError("invalid mode {}".format(mode)) + if not HAS_SWMR: + swmr = False + if swmr and libver is None: + libver = self._SWMR_LIBVER + + if enable_file_locking is not None: + deprecated_warning( + type_="argument", + name="enable_file_locking", + replacement="locking", + since_version="1.0", + ) + if locking is None: + locking = enable_file_locking + locking = _hdf5_file_locking( + mode=mode, locking=locking, swmr=swmr, libver=libver + ) + if self._LOCKING_MGR is None: + kwargs.setdefault("locking", locking) + else: + self._LOCKING_MGR.set_locking(locking) + + if HAS_TRACK_ORDER: + kwargs.setdefault("track_order", True) + try: + super().__init__(filename, mode=mode, swmr=swmr, libver=libver, **kwargs) + except OSError as e: + # wlock wSWMR rlock rSWMR OSError: Unable to open file (...) + # 1 TRUE FALSE FALSE FALSE - + # 2 TRUE FALSE FALSE TRUE - + # 3 TRUE FALSE TRUE FALSE unable to lock file, errno = 11, error message = 'Resource temporarily unavailable' + # 4 TRUE FALSE TRUE TRUE unable to lock file, errno = 11, error message = 'Resource temporarily unavailable' + # 5 TRUE TRUE FALSE FALSE file is already open for write (may use <h5clear file> to clear file consistency flags) + # 6 TRUE TRUE FALSE TRUE - + # 7 TRUE TRUE TRUE FALSE file is already open for write (may use <h5clear file> to clear file consistency flags) + # 8 TRUE TRUE TRUE TRUE - + if ( + mode == "r" + and swmr is None + and "file is already open for write" in str(e) + ): + # Try reading in SWMR mode (situation 5 and 7) + swmr = True + if libver is None: + libver = self._SWMR_LIBVER + super().__init__( + filename, mode=mode, swmr=swmr, libver=libver, **kwargs + ) + else: + raise + else: + self._file_open_callback() + try: + if mode != "r" and swmr: + # Try setting writer in SWMR mode + self.swmr_mode = True + except Exception: + self.close() + raise + + def close(self): + super().close() + self._file_close_callback() + + def _file_open_callback(self): + if self._LOCKING_MGR is not None: + self._LOCKING_MGR.opened() + + def _file_close_callback(self): + if self._LOCKING_MGR is not None: + self._LOCKING_MGR.closed() diff --git a/src/silx/io/nxdata/__init__.py b/src/silx/io/nxdata/__init__.py new file mode 100644 index 0000000..5bfa442 --- /dev/null +++ b/src/silx/io/nxdata/__init__.py @@ -0,0 +1,66 @@ +# coding: utf-8 +# /*########################################################################## +# +# Copyright (c) 2018 European Synchrotron Radiation Facility +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. +# +# ###########################################################################*/ +""" +:mod:`nxdata`: NXdata parsing and validation +-------------------------------------------- + +To parse an existing NXdata group, use :class:`NXdata`. + +Following functions help you check the validity of a existing NXdata group: + - :func:`is_valid_nxdata` + - :func:`is_NXentry_with_default_NXdata` + - :func:`is_NXroot_with_default_NXdata` + +To help you write a NXdata group, you can use :func:`save_NXdata`. + +.. currentmodule:: silx.io.nxdata + +Classes ++++++++ + +.. autoclass:: NXdata + :members: + + +Functions ++++++++++ + +.. autofunction:: get_default + +.. autofunction:: is_valid_nxdata + +.. autofunction:: is_group_with_default_NXdata + +.. autofunction:: is_NXentry_with_default_NXdata + +.. autofunction:: is_NXroot_with_default_NXdata + +.. autofunction:: save_NXdata + +""" +from .parse import NXdata, get_default, is_valid_nxdata, InvalidNXdataError, \ + is_NXentry_with_default_NXdata, is_NXroot_with_default_NXdata, is_group_with_default_NXdata +from ._utils import get_attr_as_unicode, get_attr_as_string, nxdata_logger +from .write import save_NXdata diff --git a/src/silx/io/nxdata/_utils.py b/src/silx/io/nxdata/_utils.py new file mode 100644 index 0000000..12318f1 --- /dev/null +++ b/src/silx/io/nxdata/_utils.py @@ -0,0 +1,183 @@ +# coding: utf-8 +# /*########################################################################## +# +# Copyright (c) 2017-2021 European Synchrotron Radiation Facility +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. +# +# ###########################################################################*/ +"""Utility functions used by NXdata validation and parsing.""" + +import copy +import logging + +import numpy + +from silx.io import is_dataset +from silx.utils.deprecation import deprecated + + +__authors__ = ["P. Knobel"] +__license__ = "MIT" +__date__ = "17/04/2018" + + +nxdata_logger = logging.getLogger("silx.io.nxdata") + + +INTERPDIM = {"scalar": 0, + "spectrum": 1, + "image": 2, + "rgba-image": 3, # "hsla-image": 3, "cmyk-image": 3, # TODO + "vertex": 1} # 3D scatter: 1D signal + 3 axes (x, y, z) of same legth +"""Number of signal dimensions associated to each possible @interpretation +attribute. +""" + + +@deprecated(since_version="0.8.0", replacement="get_attr_as_unicode") +def get_attr_as_string(*args, **kwargs): + return get_attr_as_unicode(*args, **kwargs) + + +def get_attr_as_unicode(item, attr_name, default=None): + """Return item.attrs[attr_name] as unicode or as a + list of unicode. + + Numpy arrays of strings or bytes returned by h5py are converted to + lists of unicode. + + :param item: Group or dataset + :param attr_name: Attribute name + :param default: Value to be returned if attribute is not found. + :return: item.attrs[attr_name] + """ + attr = item.attrs.get(attr_name, default) + + if isinstance(attr, bytes): + # byte-string + return attr.decode("utf-8") + elif isinstance(attr, numpy.ndarray) and not attr.shape: + if isinstance(attr[()], bytes): + # byte string as ndarray scalar + return attr[()].decode("utf-8") + else: + # other scalar, possibly unicode + return attr[()] + elif isinstance(attr, numpy.ndarray) and len(attr.shape): + if hasattr(attr[0], "decode"): + # array of byte-strings + return [element.decode("utf-8") for element in attr] + else: + # other array, most likely unicode objects + return [element for element in attr] + else: + return copy.deepcopy(attr) + + +def get_uncertainties_names(group, signal_name): + # Test consistency of @uncertainties + uncertainties_names = get_attr_as_unicode(group, "uncertainties") + if uncertainties_names is None: + uncertainties_names = get_attr_as_unicode(group[signal_name], "uncertainties") + if isinstance(uncertainties_names, str): + uncertainties_names = [uncertainties_names] + return uncertainties_names + + +def get_signal_name(group): + """Return the name of the (main) signal in a NXdata group. + Return None if this info is missing (invalid NXdata). + + """ + signal_name = get_attr_as_unicode(group, "signal", default=None) + if signal_name is None: + nxdata_logger.info("NXdata group %s does not define a signal attr. " + "Testing legacy specification.", group.name) + for key in group: + if "signal" in group[key].attrs: + signal_name = key + signal_attr = group[key].attrs["signal"] + if signal_attr in [1, b"1", u"1"]: + # This is the main (default) signal + break + return signal_name + + +def get_auxiliary_signals_names(group): + """Return list of auxiliary signals names""" + auxiliary_signals_names = get_attr_as_unicode(group, "auxiliary_signals", + default=[]) + if isinstance(auxiliary_signals_names, (str, bytes)): + auxiliary_signals_names = [auxiliary_signals_names] + return auxiliary_signals_names + + +def validate_auxiliary_signals(group, signal_name, auxiliary_signals_names): + """Check data dimensionality and size. Return False if invalid.""" + issues = [] + for asn in auxiliary_signals_names: + if asn not in group or not is_dataset(group[asn]): + issues.append( + "Cannot find auxiliary signal dataset '%s'" % asn) + elif group[signal_name].shape != group[asn].shape: + issues.append("Auxiliary signal dataset '%s' does not" % asn + + " have the same shape as the main signal.") + return issues + + +def validate_number_of_axes(group, signal_name, num_axes): + issues = [] + ndims = len(group[signal_name].shape) + if 1 < ndims < num_axes: + # ndim = 1 with several axes could be a scatter + issues.append( + "More @axes defined than there are " + + "signal dimensions: " + + "%d axes, %d dimensions." % (num_axes, ndims)) + + # case of less axes than dimensions: number of axes must match + # dimensionality defined by @interpretation + elif ndims > num_axes: + interpretation = get_attr_as_unicode(group[signal_name], "interpretation") + if interpretation is None: + interpretation = get_attr_as_unicode(group, "interpretation") + if interpretation is None: + issues.append("No @interpretation and not enough" + + " @axes defined.") + + elif interpretation not in INTERPDIM: + issues.append("Unrecognized @interpretation=" + interpretation + + " for data with wrong number of defined @axes.") + elif interpretation == "rgba-image": + if ndims != 3 or group[signal_name].shape[-1] not in [3, 4]: + issues.append( + "Inconsistent RGBA Image. Expected 3 dimensions with " + + "last one of length 3 or 4. Got ndim=%d " % ndims + + "with last dimension of length %d." % group[signal_name].shape[-1]) + if num_axes != 2: + issues.append( + "Inconsistent number of axes for RGBA Image. Expected " + "3, but got %d." % ndims) + + elif num_axes != INTERPDIM[interpretation]: + issues.append( + "%d-D signal with @interpretation=%s " % (ndims, interpretation) + + "must define %d or %d axes." % (ndims, INTERPDIM[interpretation])) + return issues diff --git a/src/silx/io/nxdata/parse.py b/src/silx/io/nxdata/parse.py new file mode 100644 index 0000000..d00f65b --- /dev/null +++ b/src/silx/io/nxdata/parse.py @@ -0,0 +1,1004 @@ +# coding: utf-8 +# /*########################################################################## +# +# Copyright (c) 2017-2021 European Synchrotron Radiation Facility +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. +# +# ###########################################################################*/ +"""This package provides a collection of functions to work with h5py-like +groups following the NeXus *NXdata* specification. + +See http://download.nexusformat.org/sphinx/classes/base_classes/NXdata.html + +The main class is :class:`NXdata`. +You can also fetch the default NXdata in a NXroot or a NXentry with function +:func:`get_default`. + + +Other public functions: + + - :func:`is_valid_nxdata` + - :func:`is_NXroot_with_default_NXdata` + - :func:`is_NXentry_with_default_NXdata` + - :func:`is_group_with_default_NXdata` + +""" + +import json +import numpy + +from silx.io.utils import is_group, is_file, is_dataset, h5py_read_dataset + +from ._utils import get_attr_as_unicode, INTERPDIM, nxdata_logger, \ + get_uncertainties_names, get_signal_name, \ + get_auxiliary_signals_names, validate_auxiliary_signals, validate_number_of_axes + + +__authors__ = ["P. Knobel"] +__license__ = "MIT" +__date__ = "24/03/2020" + + +class InvalidNXdataError(Exception): + pass + + +class _SilxStyle(object): + """NXdata@SILX_style parser. + + :param NXdata nxdata: + NXdata description for which to extract silx_style information. + """ + + def __init__(self, nxdata): + naxes = len(nxdata.axes) + self._axes_scale_types = [None] * naxes + self._signal_scale_type = None + + stylestr = get_attr_as_unicode(nxdata.group, "SILX_style") + if stylestr is None: + return + + try: + style = json.loads(stylestr) + except json.JSONDecodeError: + nxdata_logger.error( + "Ignoring SILX_style, cannot parse: %s", stylestr) + return + + if not isinstance(style, dict): + nxdata_logger.error( + "Ignoring SILX_style, cannot parse: %s", stylestr) + + if 'axes_scale_types' in style: + axes_scale_types = style['axes_scale_types'] + + if isinstance(axes_scale_types, str): + # Convert single argument to list + axes_scale_types = [axes_scale_types] + + if not isinstance(axes_scale_types, list): + nxdata_logger.error( + "Ignoring SILX_style:axes_scale_types, not a list") + else: + for scale_type in axes_scale_types: + if scale_type not in ('linear', 'log'): + nxdata_logger.error( + "Ignoring SILX_style:axes_scale_types, invalid value: %s", str(scale_type)) + break + else: # All values are valid + if len(axes_scale_types) > naxes: + nxdata_logger.error( + "Clipping SILX_style:axes_scale_types, too many values") + axes_scale_types = axes_scale_types[:naxes] + elif len(axes_scale_types) < naxes: + # Extend axes_scale_types with None to match number of axes + axes_scale_types = [None] * (naxes - len(axes_scale_types)) + axes_scale_types + self._axes_scale_types = tuple(axes_scale_types) + + if 'signal_scale_type' in style: + scale_type = style['signal_scale_type'] + if scale_type not in ('linear', 'log'): + nxdata_logger.error( + "Ignoring SILX_style:signal_scale_type, invalid value: %s", str(scale_type)) + else: + self._signal_scale_type = scale_type + + axes_scale_types = property( + lambda self: self._axes_scale_types, + doc="Tuple of NXdata axes scale types (None, 'linear' or 'log'). List[str]") + + signal_scale_type = property( + lambda self: self._signal_scale_type, + doc="NXdata signal scale type (None, 'linear' or 'log'). str") + + +class NXdata(object): + """NXdata parser. + + .. note:: + + Before attempting to access any attribute or property, + you should check that :attr:`is_valid` is *True*. + + :param group: h5py-like group following the NeXus *NXdata* specification. + :param boolean validate: Set this parameter to *False* to skip the initial + validation. This option is provided for optimisation purposes, for cases + where :meth:`silx.io.nxdata.is_valid_nxdata` has already been called + prior to instantiating this :class:`NXdata`. + """ + def __init__(self, group, validate=True): + super(NXdata, self).__init__() + self._plot_style = None + + self.group = group + """h5py-like group object with @NX_class=NXdata. + """ + + self.issues = [] + """List of error messages for malformed NXdata.""" + + if validate: + self._validate() + self.is_valid = not self.issues + """Validity status for this NXdata. + If False, all properties and attributes will be None. + """ + + self._is_scatter = None + self._axes = None + + self.signal = None + """Main signal dataset in this NXdata group. + In case more than one signal is present in this group, + the other ones can be found in :attr:`auxiliary_signals`. + """ + + self.signal_name = None + """Signal long name, as specified in the @long_name attribute of the + signal dataset. If not specified, the dataset name is used.""" + + self.signal_ndim = None + self.signal_is_0d = None + self.signal_is_1d = None + self.signal_is_2d = None + self.signal_is_3d = None + + self.axes_names = None + """List of axes names in a NXdata group. + + This attribute is similar to :attr:`axes_dataset_names` except that + if an axis dataset has a "@long_name" attribute, it will be used + instead of the dataset name. + """ + + if not self.is_valid: + nxdata_logger.debug("%s", self.issues) + else: + self.signal = self.group[self.signal_dataset_name] + self.signal_name = get_attr_as_unicode(self.signal, "long_name") + + if self.signal_name is None: + self.signal_name = self.signal_dataset_name + + # ndim will be available in very recent h5py versions only + self.signal_ndim = getattr(self.signal, "ndim", + len(self.signal.shape)) + + self.signal_is_0d = self.signal_ndim == 0 + self.signal_is_1d = self.signal_ndim == 1 + self.signal_is_2d = self.signal_ndim == 2 + self.signal_is_3d = self.signal_ndim == 3 + + self.axes_names = [] + # check if axis dataset defines @long_name + for _, dsname in enumerate(self.axes_dataset_names): + if dsname is not None and "long_name" in self.group[dsname].attrs: + self.axes_names.append(get_attr_as_unicode(self.group[dsname], "long_name")) + else: + self.axes_names.append(dsname) + + # excludes scatters + self.signal_is_1d = self.signal_is_1d and len(self.axes) <= 1 # excludes n-D scatters + + self._plot_style = _SilxStyle(self) + + def _validate(self): + """Fill :attr:`issues` with error messages for each error found.""" + if not is_group(self.group): + raise TypeError("group must be a h5py-like group") + if get_attr_as_unicode(self.group, "NX_class") != "NXdata": + self.issues.append("Group has no attribute @NX_class='NXdata'") + return + + signal_name = get_signal_name(self.group) + if signal_name is None: + self.issues.append("No @signal attribute on the NXdata group, " + "and no dataset with a @signal=1 attr found") + # very difficult to do more consistency tests without signal + return + + elif signal_name not in self.group or not is_dataset(self.group[signal_name]): + self.issues.append("Cannot find signal dataset '%s'" % signal_name) + return + + auxiliary_signals_names = get_auxiliary_signals_names(self.group) + self.issues += validate_auxiliary_signals(self.group, + signal_name, + auxiliary_signals_names) + + axes_names = get_attr_as_unicode(self.group, "axes") + if axes_names is None: + # try @axes on signal dataset (older NXdata specification) + axes_names = get_attr_as_unicode(self.group[signal_name], "axes") + if axes_names is not None: + # we expect a comma separated string + if hasattr(axes_names, "split"): + axes_names = axes_names.split(":") + + if isinstance(axes_names, (str, bytes)): + axes_names = [axes_names] + + if axes_names: + self.issues += validate_number_of_axes(self.group, signal_name, + num_axes=len(axes_names)) + + # Test consistency of @uncertainties + uncertainties_names = get_uncertainties_names(self.group, signal_name) + if uncertainties_names is not None: + if len(uncertainties_names) != len(axes_names): + if len(uncertainties_names) < len(axes_names): + # ignore the field to avoid index error in the axes loop + uncertainties_names = None + self.issues.append("@uncertainties does not define the same " + + "number of fields than @axes. Field ignored") + else: + self.issues.append("@uncertainties does not define the same " + + "number of fields than @axes") + + # Test individual axes + is_scatter = True # true if all axes have the same size as the signal + signal_size = 1 + for dim in self.group[signal_name].shape: + signal_size *= dim + polynomial_axes_names = [] + for i, axis_name in enumerate(axes_names): + + if axis_name == ".": + continue + if axis_name not in self.group or not is_dataset(self.group[axis_name]): + self.issues.append("Could not find axis dataset '%s'" % axis_name) + continue + + axis_size = 1 + for dim in self.group[axis_name].shape: + axis_size *= dim + + if len(self.group[axis_name].shape) != 1: + # I don't know how to interpret n-D axes + self.issues.append("Axis %s is not 1D" % axis_name) + continue + else: + # for a 1-d axis, + fg_idx = self.group[axis_name].attrs.get("first_good", 0) + lg_idx = self.group[axis_name].attrs.get("last_good", len(self.group[axis_name]) - 1) + axis_len = lg_idx + 1 - fg_idx + + if axis_len != signal_size: + if axis_len not in self.group[signal_name].shape + (1, 2): + self.issues.append( + "Axis %s number of elements does not " % axis_name + + "correspond to the length of any signal dimension," + " it does not appear to be a constant or a linear calibration," + + " and this does not seem to be a scatter plot.") + continue + elif axis_len in (1, 2): + polynomial_axes_names.append(axis_name) + is_scatter = False + else: + if not is_scatter: + self.issues.append( + "Axis %s number of elements is equal " % axis_name + + "to the length of the signal, but this does not seem" + + " to be a scatter (other axes have different sizes)") + continue + + # Test individual uncertainties + errors_name = axis_name + "_errors" + if errors_name not in self.group and uncertainties_names is not None: + errors_name = uncertainties_names[i] + if errors_name in self.group and axis_name not in polynomial_axes_names: + if self.group[errors_name].shape != self.group[axis_name].shape: + self.issues.append( + "Errors '%s' does not have the same " % errors_name + + "dimensions as axis '%s'." % axis_name) + + # test dimensions of errors associated with signal + + signal_errors = signal_name + "_errors" + if "errors" in self.group and is_dataset(self.group["errors"]): + errors = "errors" + elif signal_errors in self.group and is_dataset(self.group[signal_errors]): + errors = signal_errors + else: + errors = None + if errors: + if self.group[errors].shape != self.group[signal_name].shape: + # In principle just the same size should be enough but + # NeXus documentation imposes to have the same shape + self.issues.append( + "Dataset containing standard deviations must " + + "have the same dimensions as the signal.") + + @property + def signal_dataset_name(self): + """Name of the main signal dataset.""" + if not self.is_valid: + raise InvalidNXdataError("Unable to parse invalid NXdata") + signal_dataset_name = get_attr_as_unicode(self.group, "signal") + if signal_dataset_name is None: + # find a dataset with @signal == 1 + for dsname in self.group: + signal_attr = self.group[dsname].attrs.get("signal") + if signal_attr in [1, b"1", u"1"]: + # This is the main (default) signal + signal_dataset_name = dsname + break + assert signal_dataset_name is not None + return signal_dataset_name + + @property + def auxiliary_signals_dataset_names(self): + """Sorted list of names of the auxiliary signals datasets. + + These are the names provided by the *@auxiliary_signals* attribute + on the NXdata group. + + In case the NXdata group does not specify a *@signal* attribute + but has a dataset with an attribute *@signal=1*, + we look for datasets with attributes *@signal=2, @signal=3...* + (deprecated NXdata specification).""" + if not self.is_valid: + raise InvalidNXdataError("Unable to parse invalid NXdata") + signal_dataset_name = get_attr_as_unicode(self.group, "signal") + if signal_dataset_name is not None: + auxiliary_signals_names = get_attr_as_unicode(self.group, "auxiliary_signals") + if auxiliary_signals_names is not None: + if not isinstance(auxiliary_signals_names, + (tuple, list, numpy.ndarray)): + # tolerate a single string, but coerce into a list + return [auxiliary_signals_names] + return list(auxiliary_signals_names) + return [] + + # try old spec, @signal=1 (2, 3...) on dataset + numbered_names = [] + for dsname in self.group: + if dsname == self.signal_dataset_name: + # main signal, not auxiliary + continue + ds = self.group[dsname] + signal_attr = ds.attrs.get("signal") + if signal_attr is not None and not is_dataset(ds): + nxdata_logger.warning("Item %s with @signal=%s is not a dataset (%s)", + dsname, signal_attr, type(ds)) + continue + if signal_attr is not None: + try: + signal_number = int(signal_attr) + except (ValueError, TypeError): + nxdata_logger.warning("Could not parse attr @signal=%s on " + "dataset %s as an int", + signal_attr, dsname) + continue + numbered_names.append((signal_number, dsname)) + return [a[1] for a in sorted(numbered_names)] + + @property + def auxiliary_signals_names(self): + """List of names of the auxiliary signals. + + Similar to :attr:`auxiliary_signals_dataset_names`, but the @long_name + is used when this attribute is present, instead of the dataset name. + """ + if not self.is_valid: + raise InvalidNXdataError("Unable to parse invalid NXdata") + + signal_names = [] + for asdn in self.auxiliary_signals_dataset_names: + if "long_name" in self.group[asdn].attrs: + signal_names.append(self.group[asdn].attrs["long_name"]) + else: + signal_names.append(asdn) + return signal_names + + @property + def auxiliary_signals(self): + """List of all auxiliary signal datasets.""" + if not self.is_valid: + raise InvalidNXdataError("Unable to parse invalid NXdata") + + return [self.group[dsname] for dsname in self.auxiliary_signals_dataset_names] + + @property + def interpretation(self): + """*@interpretation* attribute associated with the *signal* + dataset of the NXdata group. ``None`` if no interpretation + attribute is present. + + The *interpretation* attribute provides information about the last + dimensions of the signal. The allowed values are: + + - *"scalar"*: 0-D data to be plotted + - *"spectrum"*: 1-D data to be plotted + - *"image"*: 2-D data to be plotted + - *"vertex"*: 3-D data to be plotted + + For example, a 3-D signal with interpretation *"spectrum"* should be + considered to be a 2-D array of 1-D data. A 3-D signal with + interpretation *"image"* should be interpreted as a 1-D array (a list) + of 2-D images. An n-D array with interpretation *"image"* should be + interpreted as an (n-2)-D array of images. + + A warning message is logged if the returned interpretation is not one + of the allowed values, but no error is raised and the unknown + interpretation is returned anyway. + """ + if not self.is_valid: + raise InvalidNXdataError("Unable to parse invalid NXdata") + + allowed_interpretations = [None, "scaler", "scalar", "spectrum", "image", + "rgba-image", # "hsla-image", "cmyk-image" + "vertex"] + + interpretation = get_attr_as_unicode(self.signal, "interpretation") + if interpretation is None: + interpretation = get_attr_as_unicode(self.group, "interpretation") + + if interpretation not in allowed_interpretations: + nxdata_logger.warning("Interpretation %s is not valid." % interpretation + + " Valid values: " + ", ".join(str(s) for s in allowed_interpretations)) + return interpretation + + @property + def axes(self): + """List of the axes datasets. + + The list typically has as many elements as there are dimensions in the + signal dataset, the exception being scatter plots which use a 1D + signal and multiple 1D axes of the same size. + + If an axis dataset applies to several dimensions of the signal, it + will be repeated in the list. + + If a dimension of the signal has no dimension scale, `None` is + inserted in its position in the list. + + .. note:: + + The *@axes* attribute should define as many entries as there + are dimensions in the signal, to avoid any ambiguity. + If this is not the case, this implementation relies on the existence + of an *@interpretation* (*spectrum* or *image*) attribute in the + *signal* dataset. + + .. note:: + + If an axis dataset defines attributes @first_good or @last_good, + the output will be a numpy array resulting from slicing that + axis (*axis[first_good:last_good + 1]*). + + :rtype: List[Dataset or 1D array or None] + """ + if not self.is_valid: + raise InvalidNXdataError("Unable to parse invalid NXdata") + + if self._axes is not None: + # use cache + return self._axes + axes = [] + for axis_name in self.axes_dataset_names: + if axis_name is None: + axes.append(None) + else: + axes.append(self.group[axis_name]) + + # keep only good range of axis data + for i, axis in enumerate(axes): + if axis is None: + continue + if "first_good" not in axis.attrs and "last_good" not in axis.attrs: + continue + fg_idx = axis.attrs.get("first_good", 0) + lg_idx = axis.attrs.get("last_good", len(axis) - 1) + axes[i] = axis[fg_idx:lg_idx + 1] + + self._axes = axes + return self._axes + + @property + def axes_dataset_names(self): + """List of axes dataset names. + + If an axis dataset applies to several dimensions of the signal, its + name will be repeated in the list. + + If a dimension of the signal has no dimension scale (i.e. there is a + "." in that position in the *@axes* array), `None` is inserted in the + output list in its position. + """ + if not self.is_valid: + raise InvalidNXdataError("Unable to parse invalid NXdata") + + numbered_names = [] # used in case of @axis=0 (old spec) + axes_dataset_names = get_attr_as_unicode(self.group, "axes") + if axes_dataset_names is None: + # try @axes on signal dataset (older NXdata specification) + axes_dataset_names = get_attr_as_unicode(self.signal, "axes") + if axes_dataset_names is not None: + # we expect a comma separated string + if hasattr(axes_dataset_names, "split"): + axes_dataset_names = axes_dataset_names.split(":") + else: + # try @axis on the individual datasets (oldest NXdata specification) + for dsname in self.group: + if not is_dataset(self.group[dsname]): + continue + axis_attr = self.group[dsname].attrs.get("axis") + if axis_attr is not None: + try: + axis_num = int(axis_attr) + except (ValueError, TypeError): + nxdata_logger.warning("Could not interpret attr @axis as" + "int on dataset %s", dsname) + continue + numbered_names.append((axis_num, dsname)) + + ndims = len(self.signal.shape) + if axes_dataset_names is None: + if numbered_names: + axes_dataset_names = [] + numbers = [a[0] for a in numbered_names] + names = [a[1] for a in numbered_names] + for i in range(ndims): + if i in numbers: + axes_dataset_names.append(names[numbers.index(i)]) + else: + axes_dataset_names.append(None) + return axes_dataset_names + else: + return [None] * ndims + + if isinstance(axes_dataset_names, (str, bytes)): + axes_dataset_names = [axes_dataset_names] + + for i, axis_name in enumerate(axes_dataset_names): + if hasattr(axis_name, "decode"): + axis_name = axis_name.decode() + if axis_name == ".": + axes_dataset_names[i] = None + + if len(axes_dataset_names) != ndims: + if self.is_scatter and ndims == 1: + # case of a 1D signal with arbitrary number of axes + return list(axes_dataset_names) + if self.interpretation != "rgba-image": + # @axes may only define 1 or 2 axes if @interpretation=spectrum/image. + # Use the existing names for the last few dims, and prepend with Nones. + assert len(axes_dataset_names) == INTERPDIM[self.interpretation] + all_dimensions_names = [None] * (ndims - INTERPDIM[self.interpretation]) + for axis_name in axes_dataset_names: + all_dimensions_names.append(axis_name) + else: + # 2 axes applying to the first two dimensions. + # The 3rd signal dimension is expected to contain 3(4) RGB(A) values. + assert len(axes_dataset_names) == 2 + all_dimensions_names = [axn for axn in axes_dataset_names] + all_dimensions_names.append(None) + return all_dimensions_names + + return list(axes_dataset_names) + + @property + def title(self): + """Plot title. If not found, returns an empty string. + + This attribute does not appear in the NXdata specification, but it is + implemented in *nexpy* as a dataset named "title" inside the NXdata + group. This dataset is expected to contain text. + + Because the *nexpy* approach could cause a conflict if the signal + dataset or an axis dataset happened to be called "title", we also + support providing the title as an attribute of the NXdata group. + """ + if not self.is_valid: + raise InvalidNXdataError("Unable to parse invalid NXdata") + + title = self.group.get("title") + data_dataset_names = [self.signal_name] + self.axes_dataset_names + if (title is not None and is_dataset(title) and + "title" not in data_dataset_names): + return str(h5py_read_dataset(title)) + + title = self.group.attrs.get("title") + if title is None: + return "" + return str(title) + + def get_axis_errors(self, axis_name): + """Return errors (uncertainties) associated with an axis. + + If the axis has attributes @first_good or @last_good, the output + is trimmed accordingly (a numpy array will be returned rather than a + dataset). + + :param str axis_name: Name of axis dataset. This dataset **must exist**. + :return: Dataset with axis errors, or None + :raise KeyError: if this group does not contain a dataset named axis_name + """ + if not self.is_valid: + raise InvalidNXdataError("Unable to parse invalid NXdata") + + # ensure axis_name is decoded, before comparing it with decoded attributes + if hasattr(axis_name, "decode"): + axis_name = axis_name.decode("utf-8") + if axis_name not in self.group: + # tolerate axis_name given as @long_name + for item in self.group: + long_name = get_attr_as_unicode(self.group[item], "long_name") + if long_name is not None and long_name == axis_name: + axis_name = item + break + + if axis_name not in self.group: + raise KeyError("group does not contain a dataset named '%s'" % axis_name) + + len_axis = len(self.group[axis_name]) + + fg_idx = self.group[axis_name].attrs.get("first_good", 0) + lg_idx = self.group[axis_name].attrs.get("last_good", len_axis - 1) + + # case of axisname_errors dataset present + errors_name = axis_name + "_errors" + if errors_name in self.group and is_dataset(self.group[errors_name]): + if fg_idx != 0 or lg_idx != (len_axis - 1): + return self.group[errors_name][fg_idx:lg_idx + 1] + else: + return self.group[errors_name] + # case of uncertainties dataset name provided in @uncertainties + uncertainties_names = get_attr_as_unicode(self.group, "uncertainties") + if uncertainties_names is None: + uncertainties_names = get_attr_as_unicode(self.signal, "uncertainties") + if isinstance(uncertainties_names, str): + uncertainties_names = [uncertainties_names] + if uncertainties_names is not None: + # take the uncertainty with the same index as the axis in @axes + axes_ds_names = get_attr_as_unicode(self.group, "axes") + if axes_ds_names is None: + axes_ds_names = get_attr_as_unicode(self.signal, "axes") + if isinstance(axes_ds_names, str): + axes_ds_names = [axes_ds_names] + elif isinstance(axes_ds_names, numpy.ndarray): + # transform numpy.ndarray into list + axes_ds_names = list(axes_ds_names) + assert isinstance(axes_ds_names, list) + if hasattr(axes_ds_names[0], "decode"): + axes_ds_names = [ax_name.decode("utf-8") for ax_name in axes_ds_names] + if axis_name not in axes_ds_names: + raise KeyError("group attr @axes does not mention a dataset " + + "named '%s'" % axis_name) + errors = self.group[uncertainties_names[list(axes_ds_names).index(axis_name)]] + if fg_idx == 0 and lg_idx == (len_axis - 1): + return errors # dataset + else: + return errors[fg_idx:lg_idx + 1] # numpy array + return None + + @property + def errors(self): + """Return errors (uncertainties) associated with the signal values. + + :return: Dataset with errors, or None + """ + if not self.is_valid: + raise InvalidNXdataError("Unable to parse invalid NXdata") + + # case of signal + signal_errors = self.signal_dataset_name + "_errors" + if "errors" in self.group and is_dataset(self.group["errors"]): + errors = "errors" + elif signal_errors in self.group and is_dataset(self.group[signal_errors]): + errors = signal_errors + else: + return None + return self.group[errors] + + @property + def plot_style(self): + """Information extracted from the optional SILX_style attribute + + :raises: InvalidNXdataError + """ + if not self.is_valid: + raise InvalidNXdataError("Unable to parse invalid NXdata") + + return self._plot_style + + @property + def is_scatter(self): + """True if the signal is 1D and all the axes have the + same size as the signal.""" + if not self.is_valid: + raise InvalidNXdataError("Unable to parse invalid NXdata") + + if self._is_scatter is not None: + return self._is_scatter + if not self.signal_is_1d: + self._is_scatter = False + else: + self._is_scatter = True + sigsize = 1 + for dim in self.signal.shape: + sigsize *= dim + for axis in self.axes: + if axis is None: + continue + axis_size = 1 + for dim in axis.shape: + axis_size *= dim + self._is_scatter = self._is_scatter and (axis_size == sigsize) + return self._is_scatter + + @property + def is_x_y_value_scatter(self): + """True if this is a scatter with a signal and two axes.""" + if not self.is_valid: + raise InvalidNXdataError("Unable to parse invalid NXdata") + + return self.is_scatter and len(self.axes) == 2 + + # we currently have no widget capable of plotting 4D data + @property + def is_unsupported_scatter(self): + """True if this is a scatter with a signal and more than 2 axes.""" + if not self.is_valid: + raise InvalidNXdataError("Unable to parse invalid NXdata") + + return self.is_scatter and len(self.axes) > 2 + + @property + def is_curve(self): + """This property is True if the signal is 1D or :attr:`interpretation` is + *"spectrum"*, and there is at most one axis with a consistent length. + """ + if not self.is_valid: + raise InvalidNXdataError("Unable to parse invalid NXdata") + + if self.signal_is_0d or self.interpretation not in [None, "spectrum"]: + return False + # the axis, if any, must be of the same length as the last dimension + # of the signal, or of length 2 (a + b *x scale) + if self.axes[-1] is not None and len(self.axes[-1]) not in [ + self.signal.shape[-1], 2]: + return False + if self.interpretation is None: + # We no longer test whether x values are monotonic + # (in the past, in that case, we used to consider it a scatter) + return self.signal_is_1d + # everything looks good + return True + + @property + def is_image(self): + """True if the signal is 2D, or 3D with last dimension of length 3 or 4 + and interpretation *rgba-image*, or >2D with interpretation *image*. + The axes (if any) length must also be consistent with the signal shape. + """ + if not self.is_valid: + raise InvalidNXdataError("Unable to parse invalid NXdata") + + if self.interpretation in ["scalar", "spectrum", "scaler"]: + return False + if self.signal_is_0d or self.signal_is_1d: + return False + if not self.signal_is_2d and \ + self.interpretation not in ["image", "rgba-image"]: + return False + if self.signal_is_3d and self.interpretation == "rgba-image": + if self.signal.shape[-1] not in [3, 4]: + return False + img_axes = self.axes[0:2] + img_shape = self.signal.shape[0:2] + else: + img_axes = self.axes[-2:] + img_shape = self.signal.shape[-2:] + for i, axis in enumerate(img_axes): + if axis is not None and len(axis) not in [img_shape[i], 2]: + return False + + return True + + @property + def is_stack(self): + """True in the signal is at least 3D and interpretation is not + "scalar", "spectrum", "image" or "rgba-image". + The axes length must also be consistent with the last 3 dimensions + of the signal. + """ + if not self.is_valid: + raise InvalidNXdataError("Unable to parse invalid NXdata") + + if self.signal_ndim < 3 or self.interpretation in [ + "scalar", "scaler", "spectrum", "image", "rgba-image"]: + return False + stack_shape = self.signal.shape[-3:] + for i, axis in enumerate(self.axes[-3:]): + if axis is not None and len(axis) not in [stack_shape[i], 2]: + return False + return True + + @property + def is_volume(self): + """True in the signal is exactly 3D and interpretation + "scalar", or nothing. + + The axes length must also be consistent with the 3 dimensions + of the signal. + """ + if not self.is_valid: + raise InvalidNXdataError("Unable to parse invalid NXdata") + + if self.signal_ndim != 3: + return False + if self.interpretation not in [None, "scalar", "scaler"]: + # 'scaler' and 'scalar' for a three dimensional array indicate a scalar field in 3D + return False + volume_shape = self.signal.shape[-3:] + for i, axis in enumerate(self.axes[-3:]): + if axis is not None and len(axis) not in [volume_shape[i], 2]: + return False + return True + + +def is_valid_nxdata(group): # noqa + """Check if a h5py group is a **valid** NX_data group. + + :param group: h5py-like group + :return: True if this NXdata group is valid. + :raise TypeError: if group is not a h5py group, a spech5 group, + or a fabioh5 group + """ + nxd = NXdata(group) + return nxd.is_valid + + +def is_group_with_default_NXdata(group, validate=True): + """Return True if group defines a valid default + NXdata. + + .. note:: + + See https://github.com/silx-kit/silx/issues/2215 + + :param group: h5py-like object. + :param bool validate: Set this to skip the NXdata validation, and only + check the existence of the group. + Parameter provided for optimisation purposes, to avoid double + validation if the validation is already performed separately.""" + default_nxdata_name = group.attrs.get("default") + if default_nxdata_name is None or default_nxdata_name not in group: + return False + + default_nxdata_group = group.get(default_nxdata_name) + + if not is_group(default_nxdata_group): + return False + + if not validate: + return True + else: + return is_valid_nxdata(default_nxdata_group) + + +def is_NXentry_with_default_NXdata(group, validate=True): + """Return True if group is a valid NXentry defining a valid default + NXdata. + + :param group: h5py-like object. + :param bool validate: Set this to skip the NXdata validation, and only + check the existence of the group. + Parameter provided for optimisation purposes, to avoid double + validation if the validation is already performed separately.""" + if not is_group(group): + return False + + if get_attr_as_unicode(group, "NX_class") != "NXentry": + return False + + return is_group_with_default_NXdata(group, validate) + + +def is_NXroot_with_default_NXdata(group, validate=True): + """Return True if group is a valid NXroot defining a default NXentry + defining a valid default NXdata. + + .. note:: + + A NXroot group cannot directly define a default NXdata. If a + *@default* argument is present, it must point to a NXentry group. + This NXentry must define a valid NXdata for this function to return + True. + + :param group: h5py-like object. + :param bool validate: Set this to False if you are sure that the target group + is valid NXdata (i.e. :func:`silx.io.nxdata.is_valid_nxdata(target_group)` + returns True). Parameter provided for optimisation purposes. + """ + if not is_group(group): + return False + + # A NXroot is supposed to be at the root of a data file, and @NX_class + # is therefore optional. We accept groups that are not located at the root + # if they have @NX_class=NXroot (use case: several nexus files archived + # in a single HDF5 file) + if get_attr_as_unicode(group, "NX_class") != "NXroot" and not is_file(group): + return False + + default_nxentry_name = group.attrs.get("default") + if default_nxentry_name is None or default_nxentry_name not in group: + return False + + default_nxentry_group = group.get(default_nxentry_name) + return is_NXentry_with_default_NXdata(default_nxentry_group, + validate=validate) + + +def get_default(group, validate=True): + """Return a :class:`NXdata` object corresponding to the default NXdata group + in the group specified as parameter. + + This function can find the NXdata if the group is already a NXdata, or + if it is a NXentry defining a default NXdata, or if it is a NXroot + defining such a default valid NXentry. + + Return None if no valid NXdata could be found. + + :param group: h5py-like group following the Nexus specification + (NXdata, NXentry or NXroot). + :param bool validate: Set this to False if you are sure that group + is valid NXdata (i.e. :func:`silx.io.nxdata.is_valid_nxdata(group)` + returns True). Parameter provided for optimisation purposes. + :return: :class:`NXdata` object or None + :raise TypeError: if group is not a h5py-like group + """ + if not is_group(group): + raise TypeError("Provided parameter is not a h5py-like group") + + if is_NXroot_with_default_NXdata(group, validate=validate): + default_entry = group[group.attrs["default"]] + default_data = default_entry[default_entry.attrs["default"]] + elif is_group_with_default_NXdata(group, validate=validate): + default_data = group[group.attrs["default"]] + elif not validate or is_valid_nxdata(group): + default_data = group + else: + return None + + return NXdata(default_data, validate=False) diff --git a/src/silx/io/nxdata/write.py b/src/silx/io/nxdata/write.py new file mode 100644 index 0000000..9e84240 --- /dev/null +++ b/src/silx/io/nxdata/write.py @@ -0,0 +1,202 @@ +# coding: utf-8 +# /*########################################################################## +# +# Copyright (c) 2017-2021 European Synchrotron Radiation Facility +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. +# +# ###########################################################################*/ + +import os +import logging + +import h5py +import numpy + +__authors__ = ["P. Knobel"] +__license__ = "MIT" +__date__ = "17/04/2018" + + +_logger = logging.getLogger(__name__) + + +def _str_to_utf8(text): + return numpy.array(text, dtype=h5py.special_dtype(vlen=str)) + + +def save_NXdata(filename, signal, axes=None, + signal_name="data", axes_names=None, + signal_long_name=None, axes_long_names=None, + signal_errors=None, axes_errors=None, + title=None, interpretation=None, + nxentry_name="entry", nxdata_name=None): + """Write data to an NXdata group. + + .. note:: + + No consistency checks are made regarding the dimensionality of the + signal and number of axes. The user is responsible for providing + meaningful data, that can be interpreted by visualization software. + + :param str filename: Path to output file. If the file does not + exists, it is created. + :param numpy.ndarray signal: Signal array. + :param List[numpy.ndarray] axes: List of axes arrays. + :param str signal_name: Name of signal dataset, in output file + :param List[str] axes_names: List of dataset names for axes, in + output file + :param str signal_long_name: *@long_name* attribute for signal, or None. + :param axes_long_names: None, or list of long names + for axes + :type axes_long_names: List[str, None] + :param numpy.ndarray signal_errors: Array of errors associated with the + signal + :param axes_errors: List of arrays of errors + associated with each axis + :type axes_errors: List[numpy.ndarray, None] + :param str title: Graph title (saved as a "title" dataset) or None. + :param str interpretation: *@interpretation* attribute ("spectrum", + "image", "rgba-image" or None). This is only needed in cases of + ambiguous dimensionality, e.g. a 3D array which represents a RGBA + image rather than a stack. + :param str nxentry_name: Name of group in which the NXdata group + is created. By default, "/entry" is used. + + .. note:: + + The Nexus format specification requires for NXdata groups + be part of a NXentry group. + The specified group should have attribute *@NX_class=NXentry*, in + order for the created file to be nexus compliant. + :param str nxdata_name: Name of NXdata group. If omitted (None), the + function creates a new group using the first available name ("data0", + or "data1"...). + Overwriting an existing group (or dataset) is not supported, you must + delete it yourself prior to calling this function if this is what you + want. + :return: True if save was successful, else False. + """ + if h5py is None: + raise ImportError("h5py could not be imported, but is required by " + "save_NXdata function") + + if axes_names is not None: + assert axes is not None, "Axes names defined, but missing axes arrays" + assert len(axes) == len(axes_names), \ + "Mismatch between number of axes and axes_names" + + if axes is not None and axes_names is None: + axes_names = [] + for i, axis in enumerate(axes): + axes_names.append("dim%d" % i if axis is not None else ".") + if axes is None: + axes = [] + + # Open file in + if os.path.exists(filename): + errmsg = "Cannot write/append to existing path %s" + if not os.path.isfile(filename): + errmsg += " (not a file)" + _logger.error(errmsg, filename) + return False + if not os.access(filename, os.W_OK): + errmsg += " (no permission to write)" + _logger.error(errmsg, filename) + return False + mode = "r+" + else: + mode = "w-" + + with h5py.File(filename, mode=mode) as h5f: + # get or create entry + if nxentry_name is not None: + entry = h5f.require_group(nxentry_name) + if "default" not in h5f.attrs: + # set this entry as default + h5f.attrs["default"] = _str_to_utf8(nxentry_name) + if "NX_class" not in entry.attrs: + entry.attrs["NX_class"] = u"NXentry" + else: + # write NXdata into the root of the file (invalid nexus!) + entry = h5f + + # Create NXdata group + if nxdata_name is not None: + if nxdata_name in entry: + _logger.error("Cannot assign an NXdata group to an existing" + " group or dataset") + return False + else: + # no name specified, take one that is available + nxdata_name = "data0" + i = 1 + while nxdata_name in entry: + _logger.info("%s item already exists in NXentry group," + + " trying %s", nxdata_name, "data%d" % i) + nxdata_name = "data%d" % i + i += 1 + + data_group = entry.create_group(nxdata_name) + data_group.attrs["NX_class"] = u"NXdata" + data_group.attrs["signal"] = _str_to_utf8(signal_name) + if axes: + data_group.attrs["axes"] = _str_to_utf8(axes_names) + if title: + # not in NXdata spec, but implemented by nexpy + data_group["title"] = title + # better way imho + data_group.attrs["title"] = _str_to_utf8(title) + + signal_dataset = data_group.create_dataset(signal_name, + data=signal) + if signal_long_name: + signal_dataset.attrs["long_name"] = _str_to_utf8(signal_long_name) + if interpretation: + signal_dataset.attrs["interpretation"] = _str_to_utf8(interpretation) + + for i, axis_array in enumerate(axes): + if axis_array is None: + assert axes_names[i] in [".", None], \ + "Axis name defined for dim %d but no axis array" % i + continue + axis_dataset = data_group.create_dataset(axes_names[i], + data=axis_array) + if axes_long_names is not None: + axis_dataset.attrs["long_name"] = _str_to_utf8(axes_long_names[i]) + + if signal_errors is not None: + data_group.create_dataset("errors", + data=signal_errors) + + if axes_errors is not None: + assert isinstance(axes_errors, (list, tuple)), \ + "axes_errors must be a list or a tuple of ndarray or None" + assert len(axes_errors) == len(axes_names), \ + "Mismatch between number of axes_errors and axes_names" + for i, axis_errors in enumerate(axes_errors): + if axis_errors is not None: + dsname = axes_names[i] + "_errors" + data_group.create_dataset(dsname, + data=axis_errors) + if "default" not in entry.attrs: + # set this NXdata as default + entry.attrs["default"] = nxdata_name + + return True diff --git a/src/silx/io/octaveh5.py b/src/silx/io/octaveh5.py new file mode 100644 index 0000000..84fa726 --- /dev/null +++ b/src/silx/io/octaveh5.py @@ -0,0 +1,171 @@ +# coding: utf-8 +# /*########################################################################## +# Copyright (C) 2016-2020 European Synchrotron Radiation Facility +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. +# +# ############################################################################*/ +""" +Python h5 module and octave h5 module have different ways to deal with +h5 files. +This module is used to make the link between octave and python using such files. +(python is using a dictionary and octave a struct ) + +This module provides tool to set HDF5 file for fasttomo input. + +Here is an example of a simple read and write : + +.. code-block:: python + :emphasize-lines: 3,5 + + # writing a structure + myStruct = {'MKEEP_MASK': 0.0, 'UNSHARP_SIGMA': 0.80000000000000004 } + writer = Octaveh5().open("my_h5file", 'a') + writer.write('mt_struct_name', myStruct) + + # reading a h5 file + reader = Octaveh5().open("my_h5file") + strucDict = reader.get('mt_struct_name') + +.. note:: These functions depend on the `h5py <http://www.h5py.org/>`_ + library, which is a mandatory dependency for `silx`. + +""" + +import logging +logger = logging.getLogger(__name__) +import numpy as np +import h5py + +__authors__ = ["C. Nemoz", "H. Payno"] +__license__ = "MIT" +__date__ = "05/10/2016" + + +class Octaveh5(object): + """This class allows communication between octave and python using hdf5 format. + """ + + def __init__(self, octave_targetted_version=3.8): + """Constructor + + :param octave_targetted_version: the version of Octave for which we want to write this hdf5 file. + + This is needed because for old Octave version we need to had a hack(adding one extra character) + """ + self.file = None + self.octave_targetted_version = octave_targetted_version + + def open(self, h5file, mode='r'): + """Open the h5 file which has been write by octave + + :param h5file: The path of the file to read + :param mode: the opening mode of the file :'r', 'w'... + """ + try: + self.file = h5py.File(h5file, mode) + return self + except IOError as e: + if mode == 'a': + reason = "\n %s: Can t find or create " % h5file + else: + reason = "\n %s: File not found" % h5file + self.file = None + + logger.info(reason) + raise e + + def get(self, struct_name): + """Read octave equivalent structures in hdf5 file + + :param struct_name: the identification of the top level identity we want to get from an hdf5 structure + :return: the dictionnary of the requested struct. None if can t find it + """ + if self.file is None: + info = "No file currently open" + logger.info(info) + return None + + data_dict = {} + grr = (list(self.file[struct_name].items())[1])[1] + try: + gr_level2 = grr.items() + except AttributeError: + reason = "no gr_level2" + logger.info(reason) + return None + + for key, val in iter(dict(gr_level2).items()): + data_dict[str(key)] = list(val.items())[1][1][()] + + if list(val.items())[0][1][()] != np.string_('sq_string'): + data_dict[str(key)] = float(data_dict[str(key)]) + else: + if list(val.items())[0][1][()] == np.string_('sq_string'): + # in the case the string has been stored as an nd-array of char + if type(data_dict[str(key)]) is np.ndarray: + data_dict[str(key)] = "".join(chr(item) for item in data_dict[str(key)]) + else: + data_dict[str(key)] = data_dict[str(key)].decode('UTF-8') + + # In the case Octave have added an extra character at the end + if self.octave_targetted_version < 3.8: + data_dict[str(key)] = data_dict[str(key)][:-1] + + return data_dict + + def write(self, struct_name, data_dict): + """write data_dict under the group struct_name in the open hdf5 file + + :param struct_name: the identificatioon of the structure to write in the hdf5 + :param data_dict: The python dictionnary containing the informations to write + """ + if self.file is None: + info = "No file currently open" + logger.info(info) + return + + group_l1 = self.file.create_group(struct_name) + group_l1.attrs['OCTAVE_GLOBAL'] = np.uint8(1) + group_l1.attrs['OCTAVE_NEW_FORMAT'] = np.uint8(1) + group_l1.create_dataset("type", data=np.string_('scalar struct'), dtype="|S14") + group_l2 = group_l1.create_group('value') + for ftparams in data_dict: + group_l3 = group_l2.create_group(ftparams) + group_l3.attrs['OCTAVE_NEW_FORMAT'] = np.uint8(1) + if type(data_dict[ftparams]) == str: + group_l3.create_dataset("type", (), data=np.string_('sq_string'), dtype="|S10") + if self.octave_targetted_version < 3.8: + group_l3.create_dataset("value", data=np.string_(data_dict[ftparams] + '0')) + else: + group_l3.create_dataset("value", data=np.string_(data_dict[ftparams])) + else: + group_l3.create_dataset("type", (), data=np.string_('scalar'), dtype="|S7") + group_l3.create_dataset("value", data=data_dict[ftparams]) + + def close(self): + """Close the file after calling read function + """ + if self.file: + self.file.close() + + def __del__(self): + """Destructor + """ + self.close() diff --git a/src/silx/io/rawh5.py b/src/silx/io/rawh5.py new file mode 100644 index 0000000..ceabbdb --- /dev/null +++ b/src/silx/io/rawh5.py @@ -0,0 +1,71 @@ +# coding: utf-8 +# /*########################################################################## +# Copyright (C) 2016-2017 European Synchrotron Radiation Facility +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. +# +# ############################################################################*/ +""" +This module contains wrapper from file format to h5py. The exposed layout is +as close as possible to the original file format. +""" +import numpy +from . import commonh5 +import logging + +__authors__ = ["V. Valls"] +__license__ = "MIT" +__date__ = "21/09/2017" + + +_logger = logging.getLogger(__name__) + + +class _FreeDataset(commonh5.Dataset): + + def _check_data(self, data): + """Release the constriants checked on types cause we can reach more + types than the one available on h5py, and it is not supposed to be + converted into h5py.""" + chartype = data.dtype.char + if chartype in ["U", "O"]: + msg = "Dataset '%s' uses an unsupported type '%s'." + msg = msg % (self.name, data.dtype) + _logger.warning(msg) + + +class NumpyFile(commonh5.File): + """ + Expose a numpy file `npy`, or `npz` as an h5py.File-like. + + :param str name: Filename to load + """ + def __init__(self, name=None): + commonh5.File.__init__(self, name=name, mode="w") + np_file = numpy.load(name) + if hasattr(np_file, "close"): + # For npz (created using by numpy.savez, numpy.savez_compressed) + for key, value in np_file.items(): + self[key] = _FreeDataset(None, data=value) + np_file.close() + else: + # For npy (created using numpy.save) + value = np_file + dataset = _FreeDataset("data", data=value) + self.add_node(dataset) diff --git a/src/silx/io/setup.py b/src/silx/io/setup.py new file mode 100644 index 0000000..9cafa17 --- /dev/null +++ b/src/silx/io/setup.py @@ -0,0 +1,87 @@ +# coding: ascii +# +# JK: Numpy.distutils which imports this does not handle utf-8 in version<1.12 +# +# /*########################################################################## +# +# Copyright (c) 2016-2018 European Synchrotron Radiation Facility +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. +# +# ###########################################################################*/ + +__authors__ = ["P. Knobel", "V.A. Sole"] +__license__ = "MIT" +__date__ = "03/10/2016" + +import os +import sys + +from numpy.distutils.misc_util import Configuration + + +# Locale and platform management +SPECFILE_USE_GNU_SOURCE = os.getenv("SPECFILE_USE_GNU_SOURCE") +if SPECFILE_USE_GNU_SOURCE is None: + SPECFILE_USE_GNU_SOURCE = 0 + if sys.platform.lower().startswith("linux"): + warn = ("silx.io.specfile WARNING:", + "A cleaner locale independent implementation", + "may be achieved setting SPECFILE_USE_GNU_SOURCE to 1", + "For instance running this script as:", + "SPECFILE_USE_GNU_SOURCE=1 python setup.py build") + print(os.linesep.join(warn)) +else: + SPECFILE_USE_GNU_SOURCE = int(SPECFILE_USE_GNU_SOURCE) + +if sys.platform == "win32": + define_macros = [('WIN32', None), ('SPECFILE_POSIX', None)] +elif os.name.lower().startswith('posix'): + define_macros = [('SPECFILE_POSIX', None)] + # the best choice is to have _GNU_SOURCE defined + # as a compilation flag because that allows the + # use of strtod_l + if SPECFILE_USE_GNU_SOURCE: + define_macros = [('_GNU_SOURCE', 1)] +else: + define_macros = [] + + +def configuration(parent_package='', top_path=None): + config = Configuration('io', parent_package, top_path) + config.add_subpackage('test') + config.add_subpackage('nxdata') + + srcfiles = ['sfheader', 'sfinit', 'sflists', 'sfdata', 'sfindex', + 'sflabel', 'sfmca', 'sftools', 'locale_management'] + sources = [os.path.join('specfile', 'src', ffile + '.c') for ffile in srcfiles] + sources.append('specfile.pyx') + + config.add_extension('specfile', + sources=sources, + define_macros=define_macros, + include_dirs=[os.path.join('specfile', 'include')], + language='c') + return config + + +if __name__ == "__main__": + from numpy.distutils.core import setup + + setup(configuration=configuration) diff --git a/src/silx/io/specfile.pyx b/src/silx/io/specfile.pyx new file mode 100644 index 0000000..cb9e1a5 --- /dev/null +++ b/src/silx/io/specfile.pyx @@ -0,0 +1,1268 @@ +# coding: utf-8 +# /*########################################################################## +# Copyright (C) 2016-2018 European Synchrotron Radiation Facility +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. +# +# ############################################################################*/ +""" +This module is a cython binding to wrap the C SpecFile library, to access +SpecFile data within a python program. + +Documentation for the original C library SpecFile can be found on the ESRF +website: +`The manual for the SpecFile Library <http://ftp.esrf.fr/pub/scisoft/silx/doc/SpecFileManual.pdf>`_ + +Examples +======== + +Start by importing :class:`SpecFile` and instantiate it: + +.. code-block:: python + + from silx.io.specfile import SpecFile + + sf = SpecFile("test.dat") + +A :class:`SpecFile` instance can be accessed like a dictionary to obtain a +:class:`Scan` instance. + +If the key is a string representing two values +separated by a dot (e.g. ``"1.2"``), they will be treated as the scan number +(``#S`` header line) and the scan order:: + + # get second occurrence of scan "#S 1" + myscan = sf["1.2"] + + # access scan data as a numpy array + nlines, ncolumns = myscan.data.shape + +If the key is an integer, it will be treated as a 0-based index:: + + first_scan = sf[0] + second_scan = sf[1] + +It is also possible to browse through all scans using :class:`SpecFile` as +an iterator:: + + for scan in sf: + print(scan.scan_header_dict['S']) + +MCA spectra can be selectively loaded using an instance of :class:`MCA` +provided by :class:`Scan`:: + + # Only one MCA spectrum is loaded in memory + second_mca = first_scan.mca[1] + + # Iterating trough all MCA spectra in a scan: + for mca_data in first_scan.mca: + print(sum(mca_data)) + +Classes +======= + +- :class:`SpecFile` +- :class:`Scan` +- :class:`MCA` + +Exceptions +========== + +- :class:`SfError` +- :class:`SfErrMemoryAlloc` +- :class:`SfErrFileOpen` +- :class:`SfErrFileClose` +- :class:`SfErrFileRead` +- :class:`SfErrFileWrite` +- :class:`SfErrLineNotFound` +- :class:`SfErrScanNotFound` +- :class:`SfErrHeaderNotFound` +- :class:`SfErrLabelNotFound` +- :class:`SfErrMotorNotFound` +- :class:`SfErrPositionNotFound` +- :class:`SfErrLineEmpty` +- :class:`SfErrUserNotFound` +- :class:`SfErrColNotFound` +- :class:`SfErrMcaNotFound` + +""" + +__authors__ = ["P. Knobel"] +__license__ = "MIT" +__date__ = "11/08/2017" + +import os.path +import logging +import numpy +import re +import sys + +_logger = logging.getLogger(__name__) + +cimport cython +from libc.stdlib cimport free + +cimport silx.io.specfile_wrapper as specfile_wrapper + + +SF_ERR_NO_ERRORS = 0 +SF_ERR_FILE_OPEN = 2 +SF_ERR_SCAN_NOT_FOUND = 7 + + +# custom errors +class SfError(Exception): + """Base exception inherited by all exceptions raised when a + C function from the legacy SpecFile library returns an error + code. + """ + pass + +class SfErrMemoryAlloc(SfError, MemoryError): pass +class SfErrFileOpen(SfError, IOError): pass +class SfErrFileClose(SfError, IOError): pass +class SfErrFileRead(SfError, IOError): pass +class SfErrFileWrite(SfError, IOError): pass +class SfErrLineNotFound(SfError, KeyError): pass +class SfErrScanNotFound(SfError, IndexError): pass +class SfErrHeaderNotFound(SfError, KeyError): pass +class SfErrLabelNotFound(SfError, KeyError): pass +class SfErrMotorNotFound(SfError, KeyError): pass +class SfErrPositionNotFound(SfError, KeyError): pass +class SfErrLineEmpty(SfError, IOError): pass +class SfErrUserNotFound(SfError, KeyError): pass +class SfErrColNotFound(SfError, KeyError): pass +class SfErrMcaNotFound(SfError, IndexError): pass + + +ERRORS = { + 1: SfErrMemoryAlloc, + 2: SfErrFileOpen, + 3: SfErrFileClose, + 4: SfErrFileRead, + 5: SfErrFileWrite, + 6: SfErrLineNotFound, + 7: SfErrScanNotFound, + 8: SfErrHeaderNotFound, + 9: SfErrLabelNotFound, + 10: SfErrMotorNotFound, + 11: SfErrPositionNotFound, + 12: SfErrLineEmpty, + 13: SfErrUserNotFound, + 14: SfErrColNotFound, + 15: SfErrMcaNotFound, +} + + +class SfNoMcaError(SfError): + """Custom exception raised when ``SfNoMca()`` returns ``-1`` + """ + pass + + +class MCA(object): + """ + + :param scan: Parent Scan instance + :type scan: :class:`Scan` + + :var calibration: MCA calibration :math:`(a, b, c)` (as in + :math:`a + b x + c x²`) from ``#@CALIB`` scan header. + :type calibration: list of 3 floats, default ``[0., 1., 0.]`` + :var channels: MCA channels list from ``#@CHANN`` scan header. + In the absence of a ``#@CHANN`` header, this attribute is a list + ``[0, …, N-1]`` where ``N`` is the length of the first spectrum. + In the absence of MCA spectra, this attribute defaults to ``None``. + :type channels: list of int + + This class provides access to Multi-Channel Analysis data. A :class:`MCA` + instance can be indexed to access 1D numpy arrays representing single + MCA spectra. + + To create a :class:`MCA` instance, you must provide a parent :class:`Scan` + instance, which in turn will provide a reference to the original + :class:`SpecFile` instance:: + + sf = SpecFile("/path/to/specfile.dat") + scan2 = Scan(sf, scan_index=2) + mcas_in_scan2 = MCA(scan2) + for i in len(mcas_in_scan2): + mca_data = mcas_in_scan2[i] + ... # do some something with mca_data (1D numpy array) + + A more pythonic way to do the same work, without having to explicitly + instantiate ``scan`` and ``mcas_in_scan``, would be:: + + sf = SpecFile("specfilename.dat") + # scan2 from previous example can be referred to as sf[2] + # mcas_in_scan2 from previous example can be referred to as scan2.mca + for mca_data in sf[2].mca: + ... # do some something with mca_data (1D numpy array) + + """ + def __init__(self, scan): + self._scan = scan + + # Header dict + self._header = scan.mca_header_dict + + self.calibration = [] + """List of lists of calibration values, + one list of 3 floats per MCA device or a single list applying to + all devices """ + self._parse_calibration() + + self.channels = [] + """List of lists of channels, + one list of integers per MCA device or a single list applying to + all devices""" + self._parse_channels() + + def _parse_channels(self): + """Fill :attr:`channels`""" + # Channels list + if "CHANN" in self._header: + chann_lines = self._header["CHANN"].split("\n") + all_chann_values = [chann_line.split() for chann_line in chann_lines] + for one_line_chann_values in all_chann_values: + length, start, stop, increment = map(int, one_line_chann_values) + self.channels.append(list(range(start, stop + 1, increment))) + elif len(self): + # in the absence of #@CHANN, use shape of first MCA + length = self[0].shape[0] + start, stop, increment = (0, length - 1, 1) + self.channels.append(list(range(start, stop + 1, increment))) + + def _parse_calibration(self): + """Fill :attr:`calibration`""" + # Channels list + if "CALIB" in self._header: + calib_lines = self._header["CALIB"].split("\n") + all_calib_values = [calib_line.split() for calib_line in calib_lines] + for one_line_calib_values in all_calib_values: + self.calibration.append(list(map(float, one_line_calib_values))) + else: + # in the absence of #@calib, use default + self.calibration.append([0., 1., 0.]) + + def __len__(self): + """ + + :return: Number of mca in Scan + :rtype: int + """ + return self._scan._specfile.number_of_mca(self._scan.index) + + def __getitem__(self, key): + """Return a single MCA data line + + :param key: 0-based index of MCA within Scan + :type key: int + + :return: Single MCA + :rtype: 1D numpy array + """ + if not len(self): + raise IndexError("No MCA spectrum found in this scan") + + if isinstance(key, (int, long)): + mca_index = key + # allow negative index, like lists + if mca_index < 0: + mca_index = len(self) + mca_index + else: + raise TypeError("MCA index should be an integer (%s provided)" % + (type(key))) + + if not 0 <= mca_index < len(self): + msg = "MCA index must be in range 0-%d" % (len(self) - 1) + raise IndexError(msg) + + return self._scan._specfile.get_mca(self._scan.index, + mca_index) + + def __iter__(self): + """Return the next MCA data line each time this method is called. + + :return: Single MCA + :rtype: 1D numpy array + """ + for mca_index in range(len(self)): + yield self._scan._specfile.get_mca(self._scan.index, mca_index) + + +def _add_or_concatenate(dictionary, key, value): + """If key doesn't exist in dictionary, create a new ``key: value`` pair. + Else append/concatenate the new value to the existing one + """ + try: + if key not in dictionary: + dictionary[key] = value + else: + dictionary[key] += "\n" + value + except TypeError: + raise TypeError("Parameter value must be a string.") + + +class Scan(object): + """ + + :param specfile: Parent SpecFile from which this scan is extracted. + :type specfile: :class:`SpecFile` + :param scan_index: Unique index defining the scan in the SpecFile + :type scan_index: int + + Interface to access a SpecFile scan + + A scan is a block of descriptive header lines followed by a 2D data array. + + Following three ways of accessing a scan are equivalent:: + + sf = SpecFile("/path/to/specfile.dat") + + # Explicit class instantiation + scan2 = Scan(sf, scan_index=2) + + # 0-based index on a SpecFile object + scan2 = sf[2] + + # Using a "n.m" key (scan number starting with 1, scan order) + scan2 = sf["3.1"] + """ + def __init__(self, specfile, scan_index): + self._specfile = specfile + + self._index = scan_index + self._number = specfile.number(scan_index) + self._order = specfile.order(scan_index) + + self._scan_header_lines = self._specfile.scan_header(self._index) + self._file_header_lines = self._specfile.file_header(self._index) + + if self._file_header_lines == self._scan_header_lines: + self._file_header_lines = [] + self._header = self._file_header_lines + self._scan_header_lines + + self._scan_header_dict = {} + self._mca_header_dict = {} + for line in self._scan_header_lines: + match = re.search(r"#(\w+) *(.*)", line) + match_mca = re.search(r"#@(\w+) *(.*)", line) + if match: + hkey = match.group(1).lstrip("#").strip() + hvalue = match.group(2).strip() + _add_or_concatenate(self._scan_header_dict, hkey, hvalue) + elif match_mca: + hkey = match_mca.group(1).lstrip("#").strip() + hvalue = match_mca.group(2).strip() + _add_or_concatenate(self._mca_header_dict, hkey, hvalue) + else: + # this shouldn't happen + _logger.warning("Unable to parse scan header line " + line) + + self._labels = [] + if self.record_exists_in_hdr('L'): + try: + self._labels = self._specfile.labels(self._index) + except SfErrLineNotFound: + # SpecFile.labels raises an IndexError when encountering + # a Scan with no data, even if the header exists. + L_header = re.sub(r" {2,}", " ", # max. 2 spaces + self._scan_header_dict["L"]) + self._labels = L_header.split(" ") + + self._file_header_dict = {} + for line in self._file_header_lines: + match = re.search(r"#(\w+) *(.*)", line) + if match: + # header type + hkey = match.group(1).lstrip("#").strip() + hvalue = match.group(2).strip() + _add_or_concatenate(self._file_header_dict, hkey, hvalue) + else: + _logger.warning("Unable to parse file header line " + line) + + self._motor_names = self._specfile.motor_names(self._index) + self._motor_positions = self._specfile.motor_positions(self._index) + + self._data = None + self._mca = None + + @cython.embedsignature(False) + @property + def index(self): + """Unique scan index 0 - len(specfile)-1 + + This attribute is implemented as a read-only property as changing + its value may cause nasty side-effects (such as loading data from a + different scan without updating the header accordingly.""" + return self._index + + @cython.embedsignature(False) + @property + def number(self): + """First value on #S line (as int)""" + return self._number + + @cython.embedsignature(False) + @property + def order(self): + """Order can be > 1 if the same number is repeated in a specfile""" + return self._order + + @cython.embedsignature(False) + @property + def header(self): + """List of raw header lines (as a list of strings). + + This includes the file header, the scan header and possibly a MCA + header. + """ + return self._header + + @cython.embedsignature(False) + @property + def scan_header(self): + """List of raw scan header lines (as a list of strings). + """ + return self._scan_header_lines + + @cython.embedsignature(False) + @property + def file_header(self): + """List of raw file header lines (as a list of strings). + """ + return self._file_header_lines + + @cython.embedsignature(False) + @property + def scan_header_dict(self): + """ + Dictionary of scan header strings, keys without the leading``#`` + (e.g. ``scan_header_dict["S"]``). + Note: this does not include MCA header lines starting with ``#@``. + """ + return self._scan_header_dict + + @cython.embedsignature(False) + @property + def mca_header_dict(self): + """ + Dictionary of MCA header strings, keys without the leading ``#@`` + (e.g. ``mca_header_dict["CALIB"]``). + """ + return self._mca_header_dict + + @cython.embedsignature(False) + @property + def file_header_dict(self): + """ + Dictionary of file header strings, keys without the leading ``#`` + (e.g. ``file_header_dict["F"]``). + """ + return self._file_header_dict + + @cython.embedsignature(False) + @property + def labels(self): + """ + List of data column headers from ``#L`` scan header + """ + return self._labels + + @cython.embedsignature(False) + @property + def data(self): + """Scan data as a 2D numpy.ndarray with the usual attributes + (e.g. data.shape). + + The first index is the detector, the second index is the sample index. + """ + if self._data is None: + self._data = numpy.transpose(self._specfile.data(self._index)) + + return self._data + + @cython.embedsignature(False) + @property + def mca(self): + """MCA data in this scan. + + Each multichannel analysis is a 1D numpy array. Metadata about + MCA data is to be found in :py:attr:`mca_header`. + + :rtype: :class:`MCA` + """ + if self._mca is None: + self._mca = MCA(self) + return self._mca + + @cython.embedsignature(False) + @property + def motor_names(self): + """List of motor names from the ``#O`` file header line. + """ + return self._motor_names + + @cython.embedsignature(False) + @property + def motor_positions(self): + """List of motor positions as floats from the ``#P`` scan header line. + """ + return self._motor_positions + + def record_exists_in_hdr(self, record): + """Check whether a scan header line exists. + + This should be used before attempting to retrieve header information + using a C function that may crash with a *segmentation fault* if the + header isn't defined in the SpecFile. + + :param record: single upper case letter corresponding to the + header you want to test (e.g. ``L`` for labels) + :type record: str + + :return: True or False + :rtype: boolean + """ + for line in self._header: + if line.startswith("#" + record): + return True + return False + + def data_line(self, line_index): + """Returns data for a given line of this scan. + + .. note:: + + A data line returned by this method, corresponds to a data line + in the original specfile (a series of data points, one per + detector). In the :attr:`data` array, this line index corresponds + to the index in the second dimension (~ column) of the array. + + :param line_index: Index of data line to retrieve (starting with 0) + :type line_index: int + + :return: Line data as a 1D array of doubles + :rtype: numpy.ndarray + """ + # attribute data corresponds to a transposed version of the original + # specfile data (where detectors correspond to columns) + return self.data[:, line_index] + + def data_column_by_name(self, label): + """Returns a data column + + :param label: Label of data column to retrieve, as defined on the + ``#L`` line of the scan header. + :type label: str + + :return: Line data as a 1D array of doubles + :rtype: numpy.ndarray + """ + try: + ret = self._specfile.data_column_by_name(self._index, label) + except SfErrLineNotFound: + # Could be a "#C Scan aborted after 0 points" + _logger.warning("Cannot get data column %s in scan %d.%d", + label, self.number, self.order) + ret = numpy.empty((0, ), numpy.double) + return ret + + def motor_position_by_name(self, name): + """Returns the position for a given motor + + :param name: Name of motor, as defined on the ``#O`` line of the + file header. + :type name: str + + :return: Motor position + :rtype: float + """ + return self._specfile.motor_position_by_name(self._index, name) + + +def _string_to_char_star(string_): + """Convert a string to ASCII encoded bytes when using python3""" + if sys.version_info[0] >= 3 and not isinstance(string_, bytes): + return bytes(string_, "ascii") + return string_ + + +def is_specfile(filename): + """Test if a file is a SPEC file, by checking if one of the first two + lines starts with *#F* (SPEC file header) or *#S* (scan header). + + :param str filename: File path + :return: *True* if file is a SPEC file, *False* if it is not a SPEC file + :rtype: bool + """ + if not os.path.isfile(filename): + return False + # test for presence of #S or #F in first 10 lines + with open(filename, "rb") as f: + chunk = f.read(2500) + for i, line in enumerate(chunk.split(b"\n")): + if line.startswith(b"#S ") or line.startswith(b"#F "): + return True + if i >= 10: + break + return False + + +cdef class SpecFile(object): + """ + + :param filename: Path of the SpecFile to read + + This class wraps the main data and header access functions of the C + SpecFile library. + """ + + cdef: + specfile_wrapper.SpecFileHandle *handle + str filename + + def __cinit__(self, filename): + cdef int error = 0 + self.handle = NULL + + if is_specfile(filename): + filename = _string_to_char_star(filename) + self.handle = specfile_wrapper.SfOpen(filename, &error) + if error: + self._handle_error(error) + else: + # handle_error takes care of raising the correct error, + # this causes the destructor to be called + self._handle_error(SF_ERR_FILE_OPEN) + + def __init__(self, filename): + if not isinstance(filename, str): + # decode bytes to str in python 3, str to unicode in python 2 + self.filename = filename.decode() + else: + self.filename = filename + + def __dealloc__(self): + """Destructor: Calls SfClose(self.handle)""" + self.close() + + def close(self): + """Close the file descriptor""" + # handle is NULL if SfOpen failed + if self.handle: + if specfile_wrapper.SfClose(self.handle): + _logger.warning("Error while closing SpecFile") + self.handle = NULL + + def __len__(self): + """Return the number of scans in the SpecFile + """ + return specfile_wrapper.SfScanNo(self.handle) + + def __iter__(self): + """Return the next :class:`Scan` in a SpecFile each time this method + is called. + + This usually happens when the python built-in function ``next()`` is + called with a :class:`SpecFile` instance as a parameter, or when a + :class:`SpecFile` instance is used as an iterator (e.g. in a ``for`` + loop). + """ + for scan_index in range(len(self)): + yield Scan(self, scan_index) + + def __getitem__(self, key): + """Return a :class:`Scan` object. + + This special method is called when a :class:`SpecFile` instance is + accessed as a dictionary (e.g. ``sf[key]``). + + :param key: 0-based scan index or ``"n.m"`` key, where ``n`` is the scan + number defined on the ``#S`` header line and ``m`` is the order + :type key: int or str + + :return: Scan defined by its 0-based index or its ``"n.m"`` key + :rtype: :class:`Scan` + """ + msg = "The scan identification key can be an integer representing " + msg += "the unique scan index or a string 'N.M' with N being the scan" + msg += " number and M the order (eg '2.3')." + + if isinstance(key, int): + scan_index = key + # allow negative index, like lists + if scan_index < 0: + scan_index = len(self) + scan_index + else: + try: + (number, order) = map(int, key.split(".")) + scan_index = self.index(number, order) + except (ValueError, SfErrScanNotFound, KeyError): + # int() can raise a value error + raise KeyError(msg + "\nValid keys: '" + + "', '".join(self.keys()) + "'") + except AttributeError: + # e.g. "AttrErr: 'float' object has no attribute 'split'" + raise TypeError(msg) + + if not 0 <= scan_index < len(self): + msg = "Scan index must be in range 0-%d" % (len(self) - 1) + raise IndexError(msg) + + return Scan(self, scan_index) + + def keys(self): + """Returns list of scan keys (eg ``['1.1', '2.1',...]``). + + :return: list of scan keys + :rtype: list of strings + """ + ret_list = [] + list_of_numbers = self._list() + count = {} + + for number in list_of_numbers: + if number not in count: + count[number] = 1 + else: + count[number] += 1 + ret_list.append(u'%d.%d' % (number, count[number])) + + return ret_list + + def __contains__(self, key): + """Return ``True`` if ``key`` is a valid scan key. + Valid keys can be a string such as ``"1.1"`` or a 0-based scan index. + """ + return key in (self.keys() + list(range(len(self)))) + + def _get_error_string(self, error_code): + """Returns the error message corresponding to the error code. + + :param code: Error code + :type code: int + :return: Human readable error message + :rtype: str + """ + return (<bytes> specfile_wrapper.SfError(error_code)).decode() + + def _handle_error(self, error_code): + """Inspect error code, raise adequate error type if necessary. + + :param code: Error code + :type code: int + """ + error_message = self._get_error_string(error_code) + if error_code in ERRORS: + raise ERRORS[error_code](error_message) + + def index(self, scan_number, scan_order=1): + """Returns scan index from scan number and order. + + :param scan_number: Scan number (possibly non-unique). + :type scan_number: int + :param scan_order: Scan order. + :type scan_order: int default 1 + + :return: Unique scan index + :rtype: int + + + Scan indices are increasing from ``0`` to ``len(self)-1`` in the + order in which they appear in the file. + Scan numbers are defined by users and are not necessarily unique. + The scan order for a given scan number increments each time the scan + number appears in a given file. + """ + idx = specfile_wrapper.SfIndex(self.handle, scan_number, scan_order) + if idx == -1: + self._handle_error(SF_ERR_SCAN_NOT_FOUND) + return idx - 1 + + def number(self, scan_index): + """Returns scan number from scan index. + + :param scan_index: Unique scan index between ``0`` and + ``len(self)-1``. + :type scan_index: int + + :return: User defined scan number. + :rtype: int + """ + idx = specfile_wrapper.SfNumber(self.handle, scan_index + 1) + if idx == -1: + self._handle_error(SF_ERR_SCAN_NOT_FOUND) + return idx + + def order(self, scan_index): + """Returns scan order from scan index. + + :param scan_index: Unique scan index between ``0`` and + ``len(self)-1``. + :type scan_index: int + + :return: Scan order (sequential number incrementing each time a + non-unique occurrence of a scan number is encountered). + :rtype: int + """ + ordr = specfile_wrapper.SfOrder(self.handle, scan_index + 1) + if ordr == -1: + self._handle_error(SF_ERR_SCAN_NOT_FOUND) + return ordr + + def _list(self): + """see documentation of :meth:`list` + """ + cdef: + long *scan_numbers + int error = SF_ERR_NO_ERRORS + + scan_numbers = specfile_wrapper.SfList(self.handle, &error) + self._handle_error(error) + + ret_list = [] + for i in range(len(self)): + ret_list.append(scan_numbers[i]) + + free(scan_numbers) + return ret_list + + def list(self): + """Returns list (1D numpy array) of scan numbers in SpecFile. + + :return: list of scan numbers (from `` #S`` lines) in the same order + as in the original SpecFile (e.g ``[1, 1, 2, 3, …]``). + :rtype: numpy array + """ + # this method is overloaded in specfilewrapper to output a string + # representation of the list + return self._list() + + def data(self, scan_index): + """Returns data for the specified scan index. + + :param scan_index: Unique scan index between ``0`` and + ``len(self)-1``. + :type scan_index: int + + :return: Complete scan data as a 2D array of doubles + :rtype: numpy.ndarray + """ + cdef: + double** mydata + long* data_info + int i, j + int error = SF_ERR_NO_ERRORS + long nlines, ncolumns, regular + double[:, :] ret_array + + sfdata_error = specfile_wrapper.SfData(self.handle, + scan_index + 1, + &mydata, + &data_info, + &error) + if sfdata_error == -1 and not error: + # this has happened in some situations with empty scans (#1759) + _logger.warning("SfData returned -1 without an error." + " Assuming aborted scan.") + + self._handle_error(error) + + if <long>data_info != 0: + nlines = data_info[0] + ncolumns = data_info[1] + regular = data_info[2] + else: + nlines = 0 + ncolumns = 0 + regular = 0 + + ret_array = numpy.empty((nlines, ncolumns), dtype=numpy.double) + + for i in range(nlines): + for j in range(ncolumns): + ret_array[i, j] = mydata[i][j] + + specfile_wrapper.freeArrNZ(<void ***>&mydata, nlines) + free(data_info) + return numpy.asarray(ret_array) + + def data_column_by_name(self, scan_index, label): + """Returns data column for the specified scan index and column label. + + :param scan_index: Unique scan index between ``0`` and + ``len(self)-1``. + :type scan_index: int + :param label: Label of data column, as defined in the ``#L`` line + of the scan header. + :type label: str + + :return: Data column as a 1D array of doubles + :rtype: numpy.ndarray + """ + cdef: + double* data_column + long i, nlines + int error = SF_ERR_NO_ERRORS + double[:] ret_array + + label = _string_to_char_star(label) + + nlines = specfile_wrapper.SfDataColByName(self.handle, + scan_index + 1, + label, + &data_column, + &error) + self._handle_error(error) + + if nlines == -1: + # this can happen on empty scans in some situations (see #1759) + _logger.warning("SfDataColByName returned -1 without an error." + " Assuming aborted scan.") + nlines = 0 + + ret_array = numpy.empty((nlines,), dtype=numpy.double) + + for i in range(nlines): + ret_array[i] = data_column[i] + + free(data_column) + return numpy.asarray(ret_array) + + def scan_header(self, scan_index): + """Return list of scan header lines. + + :param scan_index: Unique scan index between ``0`` and + ``len(self)-1``. + :type scan_index: int + + :return: List of raw scan header lines + :rtype: list of str + """ + cdef: + char** lines + int error = SF_ERR_NO_ERRORS + + nlines = specfile_wrapper.SfHeader(self.handle, + scan_index + 1, + "", # no pattern matching + &lines, + &error) + + self._handle_error(error) + + lines_list = [] + for i in range(nlines): + line = <bytes>lines[i].decode() + lines_list.append(line) + + specfile_wrapper.freeArrNZ(<void***>&lines, nlines) + return lines_list + + def file_header(self, scan_index=0): + """Return list of file header lines. + + A file header contains all lines between a ``#F`` header line and + a ``#S`` header line (start of scan). We need to specify a scan + number because there can be more than one file header in a given file. + A file header applies to all subsequent scans, until a new file + header is defined. + + :param scan_index: Unique scan index between ``0`` and + ``len(self)-1``. + :type scan_index: int + + :return: List of raw file header lines + :rtype: list of str + """ + cdef: + char** lines + int error = SF_ERR_NO_ERRORS + + nlines = specfile_wrapper.SfFileHeader(self.handle, + scan_index + 1, + "", # no pattern matching + &lines, + &error) + self._handle_error(error) + + lines_list = [] + for i in range(nlines): + line = <bytes>lines[i].decode() + lines_list.append(line) + + specfile_wrapper.freeArrNZ(<void***>&lines, nlines) + return lines_list + + def columns(self, scan_index): + """Return number of columns in a scan from the ``#N`` header line + (without ``#N`` and scan number) + + :param scan_index: Unique scan index between ``0`` and + ``len(self)-1``. + :type scan_index: int + + :return: Number of columns in scan from ``#N`` line + :rtype: int + """ + cdef: + int error = SF_ERR_NO_ERRORS + + ncolumns = specfile_wrapper.SfNoColumns(self.handle, + scan_index + 1, + &error) + self._handle_error(error) + + return ncolumns + + def command(self, scan_index): + """Return ``#S`` line (without ``#S`` and scan number) + + :param scan_index: Unique scan index between ``0`` and + ``len(self)-1``. + :type scan_index: int + + :return: S line + :rtype: str + """ + cdef: + int error = SF_ERR_NO_ERRORS + + s_record = <bytes> specfile_wrapper.SfCommand(self.handle, + scan_index + 1, + &error) + self._handle_error(error) + + return s_record.decode() + + def date(self, scan_index=0): + """Return date from ``#D`` line + + :param scan_index: Unique scan index between ``0`` and + ``len(self)-1``. + :type scan_index: int + + :return: Date from ``#D`` line + :rtype: str + """ + cdef: + int error = SF_ERR_NO_ERRORS + + d_line = <bytes> specfile_wrapper.SfDate(self.handle, + scan_index + 1, + &error) + self._handle_error(error) + + return d_line.decode() + + def labels(self, scan_index): + """Return all labels from ``#L`` line + + :param scan_index: Unique scan index between ``0`` and + ``len(self)-1``. + :type scan_index: int + + :return: All labels from ``#L`` line + :rtype: list of strings + """ + cdef: + char** all_labels + int error = SF_ERR_NO_ERRORS + + nlabels = specfile_wrapper.SfAllLabels(self.handle, + scan_index + 1, + &all_labels, + &error) + self._handle_error(error) + + labels_list = [] + for i in range(nlabels): + labels_list.append(<bytes>all_labels[i].decode()) + + specfile_wrapper.freeArrNZ(<void***>&all_labels, nlabels) + return labels_list + + def motor_names(self, scan_index=0): + """Return all motor names from ``#O`` lines + + :param scan_index: Unique scan index between ``0`` and + ``len(self)-1``.If not specified, defaults to 0 (meaning the + function returns motors names associated with the first scan). + This parameter makes a difference only if there are more than + on file header in the file, in which case the file header applies + to all following scans until a new file header appears. + :type scan_index: int + + :return: All motor names + :rtype: list of strings + """ + cdef: + char** all_motors + int error = SF_ERR_NO_ERRORS + + nmotors = specfile_wrapper.SfAllMotors(self.handle, + scan_index + 1, + &all_motors, + &error) + self._handle_error(error) + + motors_list = [] + for i in range(nmotors): + motors_list.append(<bytes>all_motors[i].decode()) + + specfile_wrapper.freeArrNZ(<void***>&all_motors, nmotors) + return motors_list + + def motor_positions(self, scan_index): + """Return all motor positions + + :param scan_index: Unique scan index between ``0`` + and ``len(self)-1``. + :type scan_index: int + + :return: All motor positions + :rtype: list of double + """ + cdef: + double* motor_positions + int error = SF_ERR_NO_ERRORS + + nmotors = specfile_wrapper.SfAllMotorPos(self.handle, + scan_index + 1, + &motor_positions, + &error) + self._handle_error(error) + + motor_positions_list = [] + for i in range(nmotors): + motor_positions_list.append(motor_positions[i]) + + free(motor_positions) + return motor_positions_list + + def motor_position_by_name(self, scan_index, name): + """Return motor position + + :param scan_index: Unique scan index between ``0`` and + ``len(self)-1``. + :type scan_index: int + + :return: Specified motor position + :rtype: double + """ + cdef: + int error = SF_ERR_NO_ERRORS + + name = _string_to_char_star(name) + + motor_position = specfile_wrapper.SfMotorPosByName(self.handle, + scan_index + 1, + name, + &error) + self._handle_error(error) + + return motor_position + + def number_of_mca(self, scan_index): + """Return number of mca spectra in a scan. + + :param scan_index: Unique scan index between ``0`` and + ``len(self)-1``. + :type scan_index: int + + :return: Number of mca spectra. + :rtype: int + """ + cdef: + int error = SF_ERR_NO_ERRORS + + num_mca = specfile_wrapper.SfNoMca(self.handle, + scan_index + 1, + &error) + # error code updating isn't implemented in SfNoMCA + if num_mca == -1: + raise SfNoMcaError("Failed to retrieve number of MCA " + + "(SfNoMca returned -1)") + return num_mca + + def mca_calibration(self, scan_index): + """Return MCA calibration in the form :math:`a + b x + c x²` + + Raise a KeyError if there is no ``@CALIB`` line in the scan header. + + :param scan_index: Unique scan index between ``0`` and + ``len(self)-1``. + :type scan_index: int + + :return: MCA calibration as a list of 3 values :math:`(a, b, c)` + :rtype: list of floats + """ + cdef: + int error = SF_ERR_NO_ERRORS + double* mca_calib + + mca_calib_error = specfile_wrapper.SfMcaCalib(self.handle, + scan_index + 1, + &mca_calib, + &error) + + # error code updating isn't implemented in SfMcaCalib + if mca_calib_error: + raise KeyError("MCA calibration line (@CALIB) not found") + + mca_calib_list = [] + for i in range(3): + mca_calib_list.append(mca_calib[i]) + + free(mca_calib) + return mca_calib_list + + def get_mca(self, scan_index, mca_index): + """Return one MCA spectrum + + :param scan_index: Unique scan index between ``0`` and ``len(self)-1``. + :type scan_index: int + :param mca_index: Index of MCA in the scan + :type mca_index: int + + :return: MCA spectrum + :rtype: 1D numpy array + """ + cdef: + int error = SF_ERR_NO_ERRORS + double* mca_data + long len_mca + double[:] ret_array + + len_mca = specfile_wrapper.SfGetMca(self.handle, + scan_index + 1, + mca_index + 1, + &mca_data, + &error) + self._handle_error(error) + + ret_array = numpy.empty((len_mca,), dtype=numpy.double) + + for i in range(len_mca): + ret_array[i] = mca_data[i] + + free(mca_data) + return numpy.asarray(ret_array) diff --git a/src/silx/io/specfile/include/Lists.h b/src/silx/io/specfile/include/Lists.h new file mode 100644 index 0000000..01164fb --- /dev/null +++ b/src/silx/io/specfile/include/Lists.h @@ -0,0 +1,56 @@ +# /*########################################################################## +# Copyright (C) 1995-2017 European Synchrotron Radiation Facility +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. +# +# ############################################################################*/ +/*************************************************************************** + * + * File: Lists.h + * + * Description: Include file for dealing with lists. + * + * Author: Vicente Rey + * + * Created: 22 May 1995 + * + * (copyright by E.S.R.F. March 1995) + * + ***************************************************************************/ +#ifndef LISTS_H +#define LISTS_H + +/* #include <malloc.h> */ + +typedef struct _ObjectList { + struct _ObjectList *next; + struct _ObjectList *prev; + void *contents; +} ObjectList; + +typedef struct _ListHeader { + struct _ObjectList *first; + struct _ObjectList *last; +} ListHeader; + +extern ObjectList * findInList ( ListHeader *list, int (*proc)(void *,void *), void *value ); +extern long addToList ( ListHeader *list, void *object,long size); +extern void unlinkFromList ( ListHeader *list, ObjectList *element); + +#endif /* LISTS_H */ diff --git a/src/silx/io/specfile/include/SpecFile.h b/src/silx/io/specfile/include/SpecFile.h new file mode 100644 index 0000000..9456e3f --- /dev/null +++ b/src/silx/io/specfile/include/SpecFile.h @@ -0,0 +1,297 @@ +# /*########################################################################## +# Copyright (C) 1995-2017 European Synchrotron Radiation Facility +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. +# +# ############################################################################*/ +/*************************************************************************** + * + * File: SpecFile.h + * + * Description: Include file for treating spec data files. + * + * Author: Vicente Rey + * + * Created: 2 March 1995 + * + * (copyright by E.S.R.F. March 1995) + * + ***************************************************************************/ +#ifndef SPECFILE_H +#define SPECFILE_H + +#include <math.h> +#include <stdio.h> +#include <fcntl.h> + +#include <time.h> +#include <stdlib.h> +/* #include <malloc.h> */ +#include <string.h> +#include <Lists.h> + +#ifdef _WINDOWS /* compiling on windows */ +#include <windows.h> +#include <io.h> +#define SF_OPENFLAG O_RDONLY | O_BINARY +#define SF_WRITEFLAG O_CREAT | O_WRONLY +#define SF_UMASK 0666 +#else /* if not windows */ +#define SF_OPENFLAG O_RDONLY +#define SF_WRITEFLAG O_CREAT | O_WRONLY +#define SF_UMASK 0666 +#endif + +#ifdef _GENLIB /* for windows dll generation */ +#define DllExport __declspec (dllexport) +#else +#define DllExport +#endif + + +#ifdef SUN4 +#define SEEK_SET 0 +#define SEEK_CUR 1 +#define SEEK_END 2 +#endif + +/* + * Defines. + */ +#define ROW 0 /* data_info index for no. of data rows */ +#define COL 1 /* data_info index for no. of data columns*/ +#define REG 2 /* data_info index for regular */ + +#define H 0 +#define K 1 +#define L 2 +#define ABORTED -1 +#define NOT_ABORTED 0 + +#define SF_ERR_NO_ERRORS 0 +#define SF_ERR_MEMORY_ALLOC 1 +#define SF_ERR_FILE_OPEN 2 +#define SF_ERR_FILE_CLOSE 3 +#define SF_ERR_FILE_READ 4 +#define SF_ERR_FILE_WRITE 5 +#define SF_ERR_LINE_NOT_FOUND 6 +#define SF_ERR_SCAN_NOT_FOUND 7 +#define SF_ERR_HEADER_NOT_FOUND 8 +#define SF_ERR_LABEL_NOT_FOUND 9 +#define SF_ERR_MOTOR_NOT_FOUND 10 +#define SF_ERR_POSITION_NOT_FOUND 11 +#define SF_ERR_LINE_EMPTY 12 +#define SF_ERR_USER_NOT_FOUND 13 +#define SF_ERR_COL_NOT_FOUND 14 +#define SF_ERR_MCA_NOT_FOUND 15 + +typedef struct _SfCursor { + long int scanno; /* nb of scans */ + long int cursor; /* beginning of current scan */ + long int hdafoffset; /* global offset of header after beginning of data */ + long int datalines; /* contains nb of data lines */ + long int dataoffset; /* contains data offset from begin of scan */ + long int mcaspectra; /* contains nb of mca spectra in scan */ + long int bytecnt; /* total file byte count */ + long int what; /* scan of file block */ + long int data; /* data flag */ + long int file_header; /* address of file header for this scan */ + long int fileh_size; /* size of it */ +} SfCursor; + + +typedef struct _SpecFile{ + int fd; + long m_time; + char *sfname; + struct _ListHeader list; + long int no_scans; + ObjectList *current; + char *scanbuffer; + long scanheadersize; + char *filebuffer; + long filebuffersize; + long scansize; + char **labels; + long int no_labels; + char **motor_names; + long int no_motor_names; + double *motor_pos; + long int no_motor_pos; + double **data; + long *data_info; + SfCursor cursor; + short updating; +} SpecFile; + +typedef struct _SpecFileOut{ + SpecFile *sf; + long *list; + long list_size; + long file_header; +} SpecFileOut; + +typedef struct _SpecScan { + long int index; + long int scan_no; + long int order; + long int offset; + long int size; + long int last; + long int file_header; + long int data_offset; + long int hdafter_offset; + long int mcaspectra; +} SpecScan; + +/* + * Function declarations. + */ + + /* + * Init + */ +/* + * init + */ +DllExport extern SpecFile *SfOpen ( char *name, int *error ); +DllExport extern short SfUpdate ( SpecFile *sf,int *error ); +DllExport extern int SfClose ( SpecFile *sf ); + +/* + * indexes + */ +DllExport extern long SfScanNo ( SpecFile *sf ); +DllExport extern long *SfList ( SpecFile *sf, int *error ); +DllExport extern long SfCondList ( SpecFile *sf, long cond, + long **scan_list, int *error ); +DllExport extern long SfIndex ( SpecFile *sf, long number, + long order ); +DllExport extern long SfIndexes ( SpecFile *sf, long number, + long **indexlist ); +DllExport extern long SfNumber ( SpecFile *sf, long index ); +DllExport extern long SfOrder ( SpecFile *sf, long index ); +DllExport extern int SfNumberOrder ( SpecFile *sf, long index, + long *number, long *order ); + + /* + * Header + */ +DllExport extern char *SfCommand ( SpecFile *sf, long index, int *error ); +DllExport extern long SfNoColumns ( SpecFile *sf, long index, int *error ); +DllExport extern char *SfDate ( SpecFile *sf, long index, int *error ); +DllExport extern long SfEpoch ( SpecFile *sf, long index, int *error ); +DllExport extern long SfNoHeaderBefore ( SpecFile *sf, long index, int *error ); +DllExport extern double *SfHKL ( SpecFile *sf, long index, int *error ); +DllExport extern long SfHeader ( SpecFile *sf, long index, char *string, + char ***lines, int *error ); +DllExport extern long SfGeometry ( SpecFile *sf, long index, + char ***lines, int *error ); +DllExport extern long SfFileHeader ( SpecFile *sf, long index, char *string, + char ***lines, int *error ); +DllExport extern char *SfFileDate ( SpecFile *sf, long index, int *error ); +DllExport extern char *SfUser ( SpecFile *sf, long index, int *error ); +DllExport extern char *SfTitle ( SpecFile *sf, long index, int *error ); + + /* + * Labels + */ +DllExport extern long SfAllLabels ( SpecFile *sf, long index, + char ***labels, int *error ); +DllExport extern char *SfLabel ( SpecFile *sf, long index, long column, + int *error ); + + /* + * Motors + */ +DllExport extern long SfAllMotors ( SpecFile *sf, long index, + char ***names, int *error ); +DllExport extern char * SfMotor ( SpecFile *sf, long index, + long number, int *error ); +DllExport extern long SfAllMotorPos ( SpecFile *sf, long index, + double **pos, int *error ); +DllExport extern double SfMotorPos ( SpecFile *sf, long index, + long number, int *error ); +DllExport extern double SfMotorPosByName ( SpecFile *sf, long index, + char *name, int *error ); + + /* + * Data + */ +DllExport extern long SfNoDataLines ( SpecFile *sf, long index, int *error ); +DllExport extern int SfData ( SpecFile *sf, long index, + double ***data, long **data_info, int *error ); +DllExport extern long SfDataAsString ( SpecFile *sf, long index, + char ***data, int *error ); +DllExport extern long SfDataLine ( SpecFile *sf, long index, long line, + double **data_line, int *error ); +DllExport extern long SfDataCol ( SpecFile *sf, long index, long col, + double **data_col, int *error ); +DllExport extern long SfDataColByName ( SpecFile *sf, long index, + char *label, double **data_col, int *error ); + + /* + * MCA functions + */ +DllExport extern long SfNoMca ( SpecFile *sf, long index, int *error ); +DllExport extern int SfGetMca ( SpecFile *sf, long index, long mcano, + double **retdata, int *error ); +DllExport extern long SfMcaCalib ( SpecFile *sf, long index, double **calib, + int *error ); + + /* + * Write and write related functions + */ +DllExport extern SpecFileOut *SfoInit ( SpecFile *sf, int *error ); +DllExport extern void SfoClose ( SpecFileOut *sfo ); +DllExport extern long SfoSelectAll ( SpecFileOut *sfo, int *error ); +DllExport extern long SfoSelectOne ( SpecFileOut *sfo, long index, + int *error ); +DllExport extern long SfoSelect ( SpecFileOut *sfo, long *list, + int *error ); +DllExport extern long SfoSelectRange ( SpecFileOut *sfo, long begin, + long end, int *error ); +DllExport extern long SfoRemoveOne ( SpecFileOut *sfo, long index, + int *error ); +DllExport extern long SfoRemove ( SpecFileOut *sfo, long *list, + int *error ); +DllExport extern long SfoRemoveRange ( SpecFileOut *sfo, long begin, + long end, int *error ); +DllExport extern long SfoRemoveAll ( SpecFileOut *sfo, int *error ); +DllExport extern long SfoWrite ( SpecFileOut *sfo, char *name, + int *error ); +DllExport extern long SfoGetList ( SpecFileOut *sfo, long **list, + int *error ); + /* + * Memory free functions + */ +DllExport extern void freeArrNZ ( void ***ptr, long no_lines ); +DllExport extern void freePtr ( void *ptr ); + + /* + * Sf Tools + */ +DllExport extern void SfShow ( SpecFile *sf ); +DllExport extern void SfShowScan ( SpecFile *sf ,long index); + /* + * Error + */ +DllExport extern char *SfError ( int code ); + +#endif /* SPECFILE_H */ diff --git a/src/silx/io/specfile/include/SpecFileCython.h b/src/silx/io/specfile/include/SpecFileCython.h new file mode 100644 index 0000000..3225e13 --- /dev/null +++ b/src/silx/io/specfile/include/SpecFileCython.h @@ -0,0 +1,28 @@ +#/*########################################################################## +# coding: utf-8 +# Copyright (C) 2016 European Synchrotron Radiation Facility +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. +# +#############################################################################*/ + +/* The original SpecFile.h has a `#define L 2` directive + that breaks cython lists and memory views. */ +#include "SpecFile.h" +#undef L diff --git a/src/silx/io/specfile/include/SpecFileP.h b/src/silx/io/specfile/include/SpecFileP.h new file mode 100644 index 0000000..97c3db6 --- /dev/null +++ b/src/silx/io/specfile/include/SpecFileP.h @@ -0,0 +1,79 @@ +# /*########################################################################## +# Copyright (C) 1995-2017 European Synchrotron Radiation Facility +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. +# +# ############################################################################*/ +/*************************************************************************** + * + * File: SpecFileP.h + * + * Description: Include file for treating spec data files. + * + * Author: Vicente Rey + * + * Created: 2 March 1995 + * + * (copyright by E.S.R.F. March 1995) + * + ***************************************************************************/ +#ifndef SPECFILE_P_H +#define SPECFILE_P_H + +/* + * Defines. + */ +#define FILE_HEADER 0 +#define SCAN 1 + +#define FROM_SCAN 0 +#define FROM_FILE 1 + +#define SF_COMMENT 'C' +#define SF_DATE 'D' +#define SF_EPOCH 'E' +#define SF_FILE_NAME 'F' +#define SF_GEOMETRY 'G' +#define SF_INTENSITY 'I' +#define SF_LABEL 'L' +#define SF_MON_NORM 'M' +#define SF_COLUMNS 'N' +#define SF_MOTOR_NAMES 'O' +#define SF_MOTOR_POSITIONS 'P' +#define SF_RECIP_SPACE 'Q' +#define SF_RESULTS 'R' +#define SF_SCAN_NUM 'S' +#define SF_TIME_NORM 'T' +#define SF_USER_DEFINED 'U' +#define SF_TEMPERATURE 'X' +#define SF_MCA_DATA '@' + +/* + * Library internal functions + */ +extern int sfSetCurrent ( SpecFile *sf, long index, int *error); +extern ObjectList *findScanByIndex ( ListHeader *list, long index ); +extern ObjectList *findScanByNo ( ListHeader *list, long scan_no, long order ); +extern void freeArr ( void ***ptr, long lines ); +extern void freeAllData ( SpecFile *sf ); +extern long mulstrtod ( char *str, double **arr, int *error ); +extern int sfGetHeaderLine ( SpecFile *sf, int from, char character, + char **buf,int *error); + +#endif /* SPECFILE_P_H */ diff --git a/src/silx/io/specfile/include/locale_management.h b/src/silx/io/specfile/include/locale_management.h new file mode 100644 index 0000000..64562c5 --- /dev/null +++ b/src/silx/io/specfile/include/locale_management.h @@ -0,0 +1,28 @@ +#/*########################################################################## +# Copyright (C) 2012-2017 European Synchrotron Radiation Facility +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. +# +#############################################################################*/ +#ifndef PyMca_LOCALE_MANAGEMENT_H +#define PyMca_LOCALE_MANAGEMENT_H + +double PyMcaAtof(const char*); + +#endif diff --git a/src/silx/io/specfile/src/locale_management.c b/src/silx/io/specfile/src/locale_management.c new file mode 100644 index 0000000..0c5f7ca --- /dev/null +++ b/src/silx/io/specfile/src/locale_management.c @@ -0,0 +1,79 @@ +# /*########################################################################## +# Copyright (C) 2012-2017 European Synchrotron Radiation Facility +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. +# +# ############################################################################*/ +#include <locale_management.h> +#include <stdlib.h> + +#ifdef _GNU_SOURCE +# include <locale.h> +# ifdef __GLIBC__ +# include <features.h> +# if !((__GLIBC__ > 2) || ((__GLIBC__ == 2) && (__GLIBC_MINOR__ > 25))) +# /* strtod_l has been moved to stdlib.h since glibc 2.26 */ +# include <xlocale.h> +# endif +# else +# include <xlocale.h> +# endif +#else +# ifdef PYMCA_POSIX +# else +# ifdef SPECFILE_POSIX +# include <locale.h> +# ifndef LOCALE_NAME_MAX_LENGTH +# define LOCALE_NAME_MAX_LENGTH 85 +# endif +# endif +# endif +#endif + +#include <string.h> + +double PyMcaAtof(const char * inputString) +{ +#ifdef _GNU_SOURCE + double result; + locale_t newLocale; + newLocale = newlocale(LC_NUMERIC_MASK, "C", NULL); + result = strtod_l(inputString, NULL, newLocale); + freelocale(newLocale); + return result; +#else +#ifdef PYMCA_POSIX + return atof(inputString); +#else +#ifdef SPECFILE_POSIX + char *currentLocaleBuffer; + char localeBuffer[LOCALE_NAME_MAX_LENGTH + 1] = {'\0'}; + double result; + currentLocaleBuffer = setlocale(LC_NUMERIC, NULL); + strcpy(localeBuffer, currentLocaleBuffer); + setlocale(LC_NUMERIC, "C\0"); + result = atof(inputString); + setlocale(LC_NUMERIC, localeBuffer); + return(result); +#else + return atof(inputString); +#endif +#endif +#endif +} diff --git a/src/silx/io/specfile/src/sfdata.c b/src/silx/io/specfile/src/sfdata.c new file mode 100644 index 0000000..689f56d --- /dev/null +++ b/src/silx/io/specfile/src/sfdata.c @@ -0,0 +1,757 @@ +# /*########################################################################## +# Copyright (C) 1995-2017 European Synchrotron Radiation Facility +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. +# +# ############################################################################*/ +/************************************************************************ + * + * File: sfdata.c + * + * Project: SpecFile library + * + * Description: Functions for getting data + * + * Author: V.Rey + * + * Date: $Date: 2005/07/04 15:02:38 $ + * + ************************************************************************/ +/* + * Log: $Log: sfdata.c,v $ + * Log: Revision 1.8 2005/07/04 15:02:38 ahoms + * Log: Fixed memory leak in SfNoDataLines + * Log: + * Log: Revision 1.7 2004/01/20 09:23:50 sole + * Log: Small change in sfdata (ptr < (to-1)) changed to (ptr <= (to-1)) + * Log: + * Log: Revision 1.6 2003/03/06 16:56:40 sole + * Log: Check if to is beyond the scan size in SfData (still not finished but it seems to solve a crash) + * Log: + * Log: Revision 1.5 2002/12/09 13:04:05 sole + * Log: Added a check in SfNoDataLines + * Log: + * Log: Revision 1.4 2002/11/13 15:02:38 sole + * Log: Removed some printing in SfData + * Log: + * Log: Revision 1.3 2002/11/12 16:22:07 sole + * Log: WARNING: Developing version - Improved MCA reading and reading properly the end of the file. + * Log: + * Log: Revision 1.2 2002/11/12 13:15:52 sole + * Log: 1st version from Armando. The idea behind is to take the last line only if it ends with \n + * Log: + * Log: Revision 3.0 2000/12/20 14:17:19 rey + * Log: Python version available + * Log: + * Revision 2.1 2000/07/31 19:05:11 19:05:11 rey (Vicente Rey-Bakaikoa) + * SfUpdate and bug corrected in ReadIndex + * + * Revision 2.0 2000/04/13 13:28:54 13:28:54 rey (Vicente Rey-Bakaikoa) + * New version of the library. Complete rewrite + * Adds support for MCA + * + */ +#include <SpecFile.h> +#include <SpecFileP.h> +#include <locale_management.h> + +#ifndef _GNU_SOURCE +#ifdef PYMCA_POSIX +#include <locale.h> +#endif +#endif + +#include <ctype.h> +/* + * Define macro + */ +#define isnumber(this) ( isdigit(this) || this == '-' || this == '+' || this == '.' || this == 'E' || this == 'e') + +/* + * Mca continuation character + */ +#define MCA_CONT '\\' +#define D_INFO 3 + +/* + * Declarations + */ +DllExport long SfNoDataLines ( SpecFile *sf, long index, int *error ); +DllExport int SfData ( SpecFile *sf, long index, double ***retdata, + long **retinfo, int *error ); +DllExport long SfDataAsString ( SpecFile *sf, long index, + char ***data, int *error ); +DllExport long SfDataLine ( SpecFile *sf, long index, long line, + double **data_line, int *error ); +DllExport long SfDataCol ( SpecFile *sf, long index, long col, + double **data_col, int *error ); +DllExport long SfDataColByName( SpecFile *sf, long index, + char *label, double **data_col, int *error ); + + +/********************************************************************* + * Function: long SfNoDataLines( sf, index, error ) + * + * Description: Gets number of data lines in a scan + * + * Parameters: + * Input : (1) File pointer + * (2) Index + * Output: + * (3) error number + * Returns: + * Number of data lines , + * ( -1 ) => errors. + * Possible errors: + * SF_ERR_SCAN_NOT_FOUND + * + *********************************************************************/ +DllExport long +SfNoDataLines( SpecFile *sf, long index, int *error ) +{ + long *dinfo = NULL; + double **data = NULL; + long nrlines = 0; + int ret, i; + + ret = SfData(sf,index,&data,&dinfo,error); + + if (ret == -1) { + return(-1); + } + if (dinfo == (long *) NULL){ + return(-1); + } + if (dinfo[ROW] < 0){ + printf("Negative number of points!\n"); + /*free(dinfo);*/ + return(-1); + } + + nrlines = dinfo[ROW]; + + /* now free all stuff that SfData allocated */ + for (i = 0; i < nrlines; i++) + free(data[i]); + free(data); + free(dinfo); + + return nrlines; +} + + + +/********************************************************************* + * Function: int SfData(sf, index, data, data_info, error) + * + * Description: Gets data. + * Parameters: + * Input : (1) File pointer + * (2) Index + * Output: + * (3) Data array + * (4) Data info : [0] => no_lines + * [1] => no_columns + * [2] = ( 0 ) => regular + * ( 1 ) => not regular ! + * (5) error number + * Returns: + * ( 0 ) => OK + * ( -1 ) => errors occured + * Possible errors: + * SF_ERR_MEMORY_ALLOC + * SF_ERR_FILE_READ + * SF_ERR_SCAN_NOT_FOUND + * SF_ERR_LINE_NOT_FOUND + * + * Remark: The memory allocated should be freed by the application + * + *********************************************************************/ +DllExport int +SfData( SpecFile *sf, long index, double ***retdata, long **retinfo, int *error ) +{ + long *dinfo = NULL; + double **data = NULL; + double *dataline = NULL; + long headersize; + + char *ptr, + *from, + *to; + + char strval[100]; + double val; + double valline[512]; + long cols, + maxcol=512; + long rows; + int i; +#ifndef _GNU_SOURCE +#ifdef PYMCA_POSIX + char *currentLocaleBuffer; + char localeBuffer[21]; +#endif +#endif + + if (index <= 0 ){ + return(-1); + } + + if (sfSetCurrent(sf,index,error) == -1 ) + return(-1); + + + /* + * Copy if already there + */ + if (sf->data_info != (long *)NULL) { + dinfo = ( long * ) malloc ( sizeof(long) * D_INFO); + dinfo[ROW] = sf->data_info[ROW]; + dinfo[COL] = sf->data_info[COL]; + dinfo[REG] = sf->data_info[REG]; + data = ( double **) malloc ( sizeof(double *) * dinfo[ROW]); + for (i=0;i<dinfo[ROW];i++) { + data[i] = (double *)malloc (sizeof(double) * dinfo[COL]); + memcpy(data[i],sf->data[i],sizeof(double) * dinfo[COL]); + } + *retdata = data; + *retinfo = dinfo; + return(0); + } + /* + * else do the job + */ + + if ( ((SpecScan *)sf->current->contents)->data_offset == -1 ) { + *retdata = data; + *retinfo = dinfo; + return(-1); + } + + headersize = ((SpecScan *)sf->current->contents)->data_offset + - ((SpecScan *)sf->current->contents)->offset; + + from = sf->scanbuffer + headersize; + to = sf->scanbuffer + ((SpecScan *)sf->current->contents)->size; + if (to > sf->scanbuffer+sf->scansize){ + /* the -32 found "experimentaly" */ + ptr = sf->scanbuffer+sf->scansize - 32; + while (*ptr != '\n') ptr--; + to=ptr; + /*printf("I let it crash ...\n");*/ + } + i=0; + ptr = from; + rows = -1; + cols = -1; + /* + * Alloc memory + */ + if ( (data = (double **) malloc (sizeof(double *)) ) == (double **)NULL) { + *error = SF_ERR_MEMORY_ALLOC; + return(-1); + } + + if ( (dinfo = (long *) malloc(sizeof(long) * D_INFO) ) == (long *)NULL) { + free(data); + *error = SF_ERR_MEMORY_ALLOC; + return(-1); + } + ptr = from; + dinfo[ROW] = dinfo[COL] = dinfo[REG] = 0; + +#ifndef _GNU_SOURCE +#ifdef PYMCA_POSIX + currentLocaleBuffer = setlocale(LC_NUMERIC, NULL); + strcpy(localeBuffer, currentLocaleBuffer); + setlocale(LC_NUMERIC, "C\0"); +#endif +#endif + for ( ; ptr < to; ptr++) { + /* get a complete line */ + i=0; + cols=0; + /*I should be at the start of a line */ + while(*(ptr) != '\n'){ + if (*(ptr-1) == '\n'){ + /*I am at the start of a line */ + while(*ptr == '#'){ + if (ptr >= to) + break; + for (; ptr < to; ptr++){ + if (*ptr == '\n'){ + break; + } + }; + /* on exit is equal to newline */ + if (ptr < to) { + ptr++; + } + } + if (*ptr == '@') { + /* + * read all mca block: go while in buffer ( ptr < to - 1 ) + * and while a newline is preceded by a slash + */ + for ( ptr = ptr + 2; + (*ptr != '\n' || (*(ptr-1) == MCA_CONT)) && ptr < to ; + ptr++); + if (ptr >= to){ + break; + } + } + while(*ptr == '#'){ + if (ptr >= to) + break; + for (; ptr < to; ptr++){ + if (*ptr == '\n'){ + break; + } + }; + /* on exit is equal to newline */ + if (ptr < to) { + ptr++; + } + } + /* first characters of buffer + */ + while (*ptr == ' ' && ptr < to) ptr++; /* get rid of empty spaces */ + } + /* + * in the middle of a line + */ + if (*ptr == ' ' || *ptr == '\t' ) { + strval[i] = '\0'; + i = 0; + val = PyMcaAtof(strval); + valline[cols] = val; + cols++; + if (cols >= maxcol) return(-1); + while(*(ptr+1) == ' ' || *(ptr+1) == '\t') ptr++; + } else { + if isnumber(*ptr){ + strval[i] = *ptr; + i++; + } + } + if (ptr >= (to-1)){ + break; + } + ptr++; + } + if ((*(ptr)== '\n') && (i != 0)){ + strval[i] = '\0'; + val = PyMcaAtof(strval); + valline[cols] = val; + cols++; + if (cols >= maxcol) return(-1); + /*while(*(ptr+1) == ' ' || *(ptr+1) == '\t') ptr++;*/ + } + /*printf("%c",*ptr);*/ + /* diffract31 crash -> changed from i!=0 to i==0 */ + /*cols>0 necessary scan 59 of 31oct98 */ + if ((ptr < to) && (cols >0)) { + rows++; + /*cols++;*/ + if (cols >= maxcol) return(-1); + /* printf("Adding a new row, nrows = %ld, ncols= %ld\n",rows,cols);*/ + /*printf("info col = %d cols = %d\n", dinfo[COL], cols);*/ + if (dinfo[COL] != 0 && cols != dinfo[COL]) { + ; + /*diffract31 crash -> nextline uncommented */ + dinfo[REG] = 1; + } else { + dinfo[COL] = cols; + } + if(dinfo[COL]==cols){ + dataline = (double *)malloc(sizeof(double) * cols); + memcpy(dataline,valline,sizeof(double) * cols); + data = (double **) realloc ( data, sizeof(double) * (rows+1)); + data[rows] = dataline; + dinfo[ROW]=rows+1; + }else{ + printf("Error on scan %d line %d\n", (int) index, (int) (rows+1)); + /* just ignore the line instead of stopping there with a + break; */ + rows--; + } + } + } + +#ifndef _GNU_SOURCE +#ifdef PYMCA_POSIX + setlocale(LC_NUMERIC, localeBuffer); +#endif +#endif + /* + * make a copy in specfile structure + */ + if ( dinfo[ROW] != 0 && dinfo[REG] == 0) { + if (sf->data_info != (long *)NULL){ + printf("I should not be here!/n"); + sf->data_info[ROW] = dinfo[ROW]; + sf->data_info[COL] = dinfo[COL]; + sf->data_info[REG] = dinfo[REG]; + for (i=0;i<dinfo[ROW];i++) { + sf->data[i]= (double *)realloc (sf->data[i],sizeof(double) * dinfo[COL]); + if (sf->data[i] == (double *) NULL){ + printf("Realloc problem"); + return (-1); + } + memcpy(sf->data[i],data[i],sizeof(double) * dinfo[COL]); + } + *retdata = data; + *retinfo = dinfo; + return(0); + }else{ + sf->data_info = ( long * ) malloc ( sizeof(long) * D_INFO); + sf->data_info[ROW] = dinfo[ROW]; + sf->data_info[COL] = dinfo[COL]; + sf->data_info[REG] = dinfo[REG]; + sf->data = ( double **) malloc ( sizeof(double *) * dinfo[ROW]); + if (sf->data == (double **) NULL){ + printf("malloc1 problem"); + return (-1); + } + for (i=0;i<dinfo[ROW];i++) { + sf->data[i] = (double *)malloc (sizeof(double) * dinfo[COL]); + if (sf->data[i] == (double *) NULL){ + printf("malloc2 problem"); + return (-1); + } + memcpy(sf->data[i],data[i],sizeof(double) * dinfo[COL]); + } + } + } else { + if (dinfo[REG] == 0) { + ; + /*printf("Not Freeing data:!\n");*/ + /* I can be in the case of an mca without scan points */ + /*free(data); + return(-1);*/ + } + } + *retinfo = dinfo; + *retdata = data; + return( 0 ); +} + + +DllExport long +SfDataCol ( SpecFile *sf, long index, long col, double **retdata, int *error ) +{ + double *datacol=NULL; + + long *dinfo = NULL; + double **data = NULL; + + long selection; + int i,ret; + + ret = SfData(sf,index,&data,&dinfo,error); + + if (ret == -1) { + *error = SF_ERR_COL_NOT_FOUND; + *retdata = datacol; + return(-1); + } + + if (col < 0) { + selection = dinfo[COL] + col; + } else { + selection = col - 1; + } +if (selection > dinfo[COL] - 1) { +selection=dinfo[COL] - 1; +} + if ( selection < 0 || selection > dinfo[COL] - 1) { + *error = SF_ERR_COL_NOT_FOUND; + if ( dinfo != (long *)NULL) { + freeArrNZ((void ***)&data,dinfo[ROW]); + } + free(dinfo); + return(-1); + } + + datacol = (double *) malloc( sizeof(double) * dinfo[ROW]); + if (datacol == (double *)NULL) { + *error = SF_ERR_MEMORY_ALLOC; + if ( dinfo != (long *)NULL) + freeArrNZ((void ***)&data,dinfo[ROW]); + free(dinfo); + return(-1); + } + + for (i=0;i<dinfo[ROW];i++) { + datacol[i] = data[i][selection]; + } + + ret = dinfo[ROW]; + + if ( dinfo != (long *)NULL) + freeArrNZ((void ***)&data,dinfo[ROW]); + free(dinfo); + + *retdata = datacol; + return(ret); +} + + +DllExport long +SfDataLine( SpecFile *sf, long index, long line, double **retdata, int *error ) +{ + double *datarow=NULL; + + long *dinfo = NULL; + double **data = NULL; + + long selection; + int ret; + + ret = SfData(sf,index,&data,&dinfo,error); + + if (ret == -1) { + *error = SF_ERR_LINE_NOT_FOUND; + *retdata = datarow; + return(-1); + } + + if (line < 0) { + selection = dinfo[ROW] + line; + } else { + selection = line - 1; + } + + if ( selection < 0 || selection > dinfo[ROW] - 1) { + *error = SF_ERR_LINE_NOT_FOUND; + if ( dinfo != (long *)NULL) { + freeArrNZ((void ***)&data,dinfo[ROW]); + } + free(dinfo); + return(-1); + } + + datarow = (double *) malloc( sizeof(double) * dinfo[COL]); + if (datarow == (double *)NULL) { + *error = SF_ERR_MEMORY_ALLOC; + if ( dinfo != (long *)NULL) + freeArrNZ((void ***)&data,dinfo[ROW]); + free(dinfo); + return(-1); + } + + + memcpy(datarow,data[selection],sizeof(double) * dinfo[COL]); + + ret = dinfo[COL]; + + if ( dinfo != (long *)NULL) + freeArrNZ((void ***)&data,dinfo[ROW]); + free(dinfo); + + *retdata = datarow; + return(ret); +} + + +DllExport long +SfDataColByName( SpecFile *sf, long index, char *label, double **retdata, int *error ) +{ + + double *datacol; + + long *dinfo = NULL; + double **data = NULL; + + int i,ret; + + char **labels = NULL; + + long nb_lab, + idx; + + short tofree=0; + + if ( sfSetCurrent(sf,index,error) == -1) { + *retdata = (double *)NULL; + return(-1); + } + + if ( sf->no_labels != -1 ) { + nb_lab = sf->no_labels; + labels = sf->labels; + } else { + nb_lab = SfAllLabels(sf,index,&labels,error); + tofree = 1; + } + + if ( nb_lab == 0 || nb_lab == -1) { + *retdata = (double *)NULL; + return(-1); + } + + for (idx=0;idx<nb_lab;idx++) + if (!strcmp(label,labels[idx])) break; + + if ( idx == nb_lab ) { + if (tofree) freeArrNZ((void ***)&labels,nb_lab); + *error = SF_ERR_COL_NOT_FOUND; + *retdata = (double *)NULL; + return(-1); + } + + ret = SfData(sf,index,&data,&dinfo,error); + + if (ret == -1) { + *retdata = (double *)NULL; + return(-1); + } + + datacol = (double *) malloc( sizeof(double) * dinfo[ROW]); + if (datacol == (double *)NULL) { + *error = SF_ERR_MEMORY_ALLOC; + if ( dinfo != (long *)NULL) + freeArrNZ((void ***)&data,dinfo[ROW]); + free(dinfo); + *retdata = (double *)NULL; + return(-1); + } + + for (i=0;i<dinfo[ROW];i++) { + datacol[i] = data[i][idx]; + } + + ret = dinfo[ROW]; + + if ( dinfo != (long *)NULL) + freeArrNZ((void ***)&data,dinfo[ROW]); + free(dinfo); + + *retdata = datacol; + + return(ret); +} + + +DllExport long +SfDataAsString( SpecFile *sf, long index, char ***retdata, int *error ) +{ + char **data=NULL; + char oneline[300]; + + char *from, + *to, + *ptr, + *dataline; + + long headersize,rows; + int i; + + if (sfSetCurrent(sf,index,error) == -1 ) + return(-1); + + if ( ((SpecScan *)sf->current->contents)->data_offset == -1 ) { + *retdata = data; + return(-1); + } + + data = (char **) malloc (sizeof(char *)); + + headersize = ((SpecScan *)sf->current->contents)->data_offset + - ((SpecScan *)sf->current->contents)->offset; + + from = sf->scanbuffer + headersize; + to = sf->scanbuffer + ((SpecScan *)sf->current->contents)->size; + + rows = -1; + i = 0; + + /* + * first characters of buffer + */ + + ptr = from; + + if (isnumber(*ptr)) { + rows++; + oneline[i] = *ptr; + i++; + } else if (*ptr == '@') { + /* + * read all mca block: go while in buffer ( ptr < to - 1 ) + * and while a newline is preceded by a slash + */ + for ( ptr = ptr + 2; + (*(ptr+1) != '\n' || (*ptr == MCA_CONT)) && ptr < to - 1 ; + ptr++); + } + + /* + * continue + */ + ptr++; + + for ( ; ptr < to - 1; ptr++) { + /* + * check for lines and for mca + */ + if ( *(ptr-1) == '\n' ) { + + if ( i != 0 ) { + oneline[i-1] = '\0'; + i = 0; + + dataline = (char *)strdup(oneline); + data = (char **) realloc ( data, sizeof(char *) * (rows +1)); + data[rows] = dataline; + } + + if ( *ptr == '@') { /* Mca --> pass it all */ + for ( ptr = ptr + 2; + (*ptr != '\n' || (*(ptr-1) == MCA_CONT)) && ptr < to ; + ptr++); + } else if ( *ptr == '#') { /* Comment --> pass one line */ + for (ptr = ptr + 1; *ptr != '\n';ptr++); + } else if ( isnumber(*ptr) ) { + rows++; + oneline[i] = *ptr; + i++; + } + } else { + if (rows == -1) continue; + + oneline[i] = *ptr; + i++; + } + } + + /* + * last line + */ + + if (rows != -1 && i) { + oneline[i-1] = '\0'; + dataline = (char *)strdup(oneline); + data = (char **) realloc ( data, sizeof(char *) * (rows+1)); + data[rows] = dataline; + } + + *retdata = data; + return(rows+1); +} diff --git a/src/silx/io/specfile/src/sfheader.c b/src/silx/io/specfile/src/sfheader.c new file mode 100644 index 0000000..b669e33 --- /dev/null +++ b/src/silx/io/specfile/src/sfheader.c @@ -0,0 +1,792 @@ +# /*########################################################################## +# Copyright (C) 1995-2017 European Synchrotron Radiation Facility +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. +# +# ############################################################################*/ +/************************************************************************ + * + * File: sfheader.c + * + * Project: SpecFile library + * + * Description: Functions to access file and scan headers + * + * Author: V.Rey + * + * Date: $Date: 2002/11/20 09:01:29 $ + * + ************************************************************************/ +/* + * Log: $Log: sfheader.c,v $ + * Log: Revision 1.3 2002/11/20 09:01:29 sole + * Log: Added free(line); in SfTitle + * Log: + * Log: Revision 1.2 2002/11/14 16:18:48 sole + * Log: stupid bug removed + * Log: + * Log: Revision 1.1 2002/11/14 15:25:39 sole + * Log: Initial revision + * Log: + * Log: Revision 3.0 2000/12/20 14:17:19 rey + * Log: Python version available + * Log: + * Revision 2.1 2000/07/31 19:05:09 19:05:09 rey (Vicente Rey-Bakaikoa) + * SfUpdate and bug corrected in ReadIndex + * + * Revision 2.0 2000/04/13 13:28:54 13:28:54 rey (Vicente Rey-Bakaikoa) + * New version of the library. Complete rewrite + * Adds support for MCA + */ +#include <SpecFile.h> +#include <SpecFileP.h> + +/* + * Function Declaration + */ +DllExport char * SfCommand ( SpecFile *sf, long index, int *error ); +DllExport long SfNoColumns ( SpecFile *sf, long index, int *error ); +DllExport char * SfDate ( SpecFile *sf, long index, int *error ); +DllExport double * SfHKL ( SpecFile *sf, long index, int *error ); + +DllExport long SfEpoch ( SpecFile *sf, long index, int *error ); +DllExport char * SfUser ( SpecFile *sf, long index, int *error ); +DllExport char * SfTitle ( SpecFile *sf, long index, int *error ); +DllExport char * SfFileDate ( SpecFile *sf, long index, int *error ); +DllExport long SfNoHeaderBefore ( SpecFile *sf, long index, int *error ); +DllExport long SfGeometry ( SpecFile *sf, long index, + char ***lines, int *error); +DllExport long SfHeader ( SpecFile *sf, long index, char *string, + char ***lines, int *error); +DllExport long SfFileHeader ( SpecFile *sf, long index, char *string, + char ***lines, int *error); + +int sfGetHeaderLine ( SpecFile *sf, int from, char character, + char **buf,int *error); +/* + * Internal functions + */ +static char *sfFindWord ( char *line, char *word, int *error ); +static long sfFindLines ( char *from, char *to,char *string, + char ***lines,int *error); +static char *sfOneLine ( char *from, char *end, int *error); + + +/********************************************************************* + * Function: char *SfCommand( sf, index, error ) + * + * Description: Reads '#S' line ( without #S and scan number ). + * + * Parameters: + * Input : (1) File pointer + * (2) Index + * Output: + * (3) error number + * Returns: + * String pointer, + * NULL => errors. + * Possible errors: + * SF_ERR_MEMORY_ALLOC + * SF_ERR_FILE_READ + * SF_ERR_SCAN_NOT_FOUND + * SF_ERR_LINE_NOT_FOUND + * + * Remark: The memory allocated should be freed by the application + * + *********************************************************************/ +DllExport char * +SfCommand( SpecFile *sf, long index, int *error ) +{ + char *ret_line=NULL; + long cnt,start,length; + char *ptr; + + /* + * Choose scan + */ + if (sfSetCurrent(sf,index,error) == -1) + return(ret_line); + + cnt = 3; + for ( ptr = sf->scanbuffer + cnt; *ptr != ' ' ; ptr++,cnt++); + for ( ptr = sf->scanbuffer + cnt; *ptr == ' ' || *ptr == '\t'; ptr++,cnt++); + + start = cnt; + for ( ptr = sf->scanbuffer + cnt; *ptr != '\n' ; ptr++,cnt++); + + length = cnt - start; + + /* + * Return the rest . + */ + ret_line = (char *) malloc ( sizeof(char) * ( length + 1) ); + if (ret_line == (char *)NULL) { + *error = SF_ERR_MEMORY_ALLOC; + return(ret_line); + } + + ptr = sf->scanbuffer + start; + memcpy(ret_line,ptr,sizeof(char) * length ); + ret_line[length] = '\0'; + + return( ret_line ); +} + + +/********************************************************************* + * Function: long SfNoColumns( sf, index, error ) + * + * Description: Gets number of columns in a scan + * + * Parameters: + * Input : (1) File pointer + * (2) Index + * Output: + * (3) error number + * Returns: + * Number of scan columns.(From #N line !) + * ( -1 ) if errors occured. + * Possible errors: + * SF_ERR_MEMORY_ALLOC | => readHeader() + * SF_ERR_LINE_NOT_FOUND + * SF_ERR_FILE_READ + * SF_ERR_SCAN_NOT_FOUND + * + *********************************************************************/ +DllExport long +SfNoColumns( SpecFile *sf, long index, int *error ) +{ + long col = -1; + char *buf=NULL; + + if ( sfSetCurrent(sf,index,error) == -1) + return(-1); + + if ( sfGetHeaderLine( sf, FROM_SCAN, SF_COLUMNS, &buf, error) == -1) + return(-1); + col = atol( buf ); + free(buf); + return( col ); +} + + +/********************************************************************* + * Function: char *SfDate( sf, index, error ) + * + * Description: Gets date from scan header + * + * Parameters: + * Input : (1) File pointer + * (2) Index + * Output: + * (3) error number + * Returns: + * Date.(From #D line !), + * NULL => errors. + * Possible errors: + * SF_ERR_MEMORY_ALLOC | => readHeader() + * SF_ERR_LINE_NOT_FOUND + * SF_ERR_FILE_READ + * SF_ERR_SCAN_NOT_FOUND + * + * Remark: The memory allocated should be freed by the application + * + *********************************************************************/ +DllExport char * +SfDate(SpecFile *sf, long index, int *error ) +{ + char *line=NULL; + + if ( sfSetCurrent(sf,index,error) == -1 ) + return(line); + + if ( sfGetHeaderLine( sf, FROM_SCAN, SF_DATE, &line, error)) + return((char *)NULL); + + return( line ); +} + + +/********************************************************************* + * Function: double *SfHKL( sf, index, error ) + * + * Description: Reads '#Q' line. + * + * Parameters: + * Input : (1) File pointer + * (2) Index + * Output: + * (3) error number + * Returns: + * Poiter to a 3x1 dbl. array( HKL[0]=HKL[H]=H_value, + * HKL[1]=HKL[K]=K_value, + * HKL[2]=HKL[L]=L_value. + * NULL => errors. + * + * Possible errors: + * SF_ERR_LINE_EMPTY + * SF_ERR_FILE_READ + * SF_ERR_SCAN_NOT_FOUND + * SF_ERR_LINE_NOT_FOUND + * SF_ERR_MEMORY_ALLOC | => mulstrtod() + * + * Remark: The memory allocated should be freed by the application + * + *********************************************************************/ +DllExport double * +SfHKL( SpecFile *sf, long index, int *error ) +{ + char *line=NULL; + double *HKL = NULL; + long i; + + if ( sfSetCurrent(sf,index,error) == -1 ) + return((double *)NULL); + + if ( sfGetHeaderLine( sf, FROM_SCAN, SF_RECIP_SPACE, &line, error) == -1 ) + return((double *)NULL); + + /* + * Convert into double . + */ + i = mulstrtod( line, &HKL, error ); + free(line); + + if ( i < 0) + return( (double *)NULL ); + + if ( i != 3 ) { + *error = SF_ERR_LINE_EMPTY; + free( HKL ); + return( (double *)NULL ); + } + + return( HKL ); +} + + +/********************************************************************* + * Function: long SfEpoch( sf, index, error ) + * + * Description: Gets epoch from the last file header. + * + * Parameters: + * Input : (1) File pointer + * (2) Index + * Output: + * (3) error number + * Returns: + * Epoch.(From #E line !) + * ( -1 ) if errors occured. + * Possible errors: + * SF_ERR_MEMORY_ALLOC | => readHeader() + * SF_ERR_LINE_NOT_FOUND + * SF_ERR_FILE_READ + * SF_ERR_HEADER_NOT_FOUND + * SF_ERR_SCAN_NOT_FOUND + * + *********************************************************************/ +DllExport long +SfEpoch( SpecFile *sf, long index, int *error ) +{ + char *buf=NULL; + long epoch = -1; + + if ( sfSetCurrent(sf,index,error) == -1 ) + return(-1); + + if ( sfGetHeaderLine(sf,FROM_FILE,SF_EPOCH,&buf,error) == -1 ) + return(-1); + + epoch = atol( buf ); + free(buf); + + return( epoch ); +} + + +/********************************************************************* + * Function: char SfFileDate( sf, index, error ) + * + * Description: Gets date from the last file header + * + * Parameters: + * Input : (1) File pointer + * (2) Index + * Output: + * (3) error number + * Returns: + * Date.(From #D line !) + * NULL => errors. + * + * Possible errors: + * SF_ERR_MEMORY_ALLOC | => readHeader() + * SF_ERR_LINE_NOT_FOUND + * SF_ERR_LINE_EMPTY + * SF_ERR_FILE_READ + * SF_ERR_HEADER_NOT_FOUND + * SF_ERR_SCAN_NOT_FOUND + * + *********************************************************************/ +DllExport char * +SfFileDate( SpecFile *sf, long index, int *error ) +{ + char *date = NULL; + + if ( sfSetCurrent(sf,index,error) == -1 ) + return((char *)NULL); + + if ( sfGetHeaderLine(sf,FROM_FILE,SF_DATE,&date,error) == -1 ) + return((char *)NULL); + + return( date ); +} + + +/********************************************************************* + * Function: long SfNoHeaderBefore( sf, index, error ) + * + * Description: Gets number of scan header lines before data. + * + * Parameters: + * Input : (1) File pointer + * (2) Scan index + * Output: + * (3) error number + * Returns: + * Number of scan header lines before data , + * ( -1 ) => errors. + * Possible errors: + * SF_ERR_SCAN_NOT_FOUND + * + *********************************************************************/ +DllExport long +SfNoHeaderBefore( SpecFile *sf, long index, int *error ) +{ + if ( sfSetCurrent(sf,index,error) == -1 ) + return(-1); + + /* + * Obsolete... give some reasonable! + */ + return(-1); +} + + +/********************************************************************* + * Function: char *SfUser( sf, index, error ) + * + * Description: Gets spec user information from the last file header + * + * Parameters: + * Input : (1) File pointer + * (2) Index + * Output: + * (3) error number + * Returns: + * User.(From 1st #C line !) + * Possible errors: + * SF_ERR_MEMORY_ALLOC ||=> findWordInLine() + * SF_ERR_LINE_NOT_FOUND | + * SF_ERR_FILE_READ | + * SF_ERR_SCAN_NOT_FOUND | => getFirstFileC() + * SF_ERR_HEADER_NOT_FOUND | + * SF_ERR_USER_NOT_FOUND + * + *********************************************************************/ +DllExport char * +SfUser( SpecFile *sf, long index, int *error ) +{ + + char *line=NULL; + char *user; + char word[] = "User ="; + + if (sfSetCurrent(sf,index,error) == -1) + return((char *)NULL); + + if (sfGetHeaderLine( sf, FROM_FILE, SF_COMMENT, &line, error) == -1) + return((char *)NULL); + + /* + * Find user. + */ + user = sfFindWord( line, word, error ); + + if ( user == (char *) NULL) { + *error = SF_ERR_USER_NOT_FOUND; + return((char *)NULL); + } + + free(line); + return( user ); +} + + +/********************************************************************* + * Function: long SfTitle( sf, index, error ) + * + * Description: Gets spec title information from the last file header + * + * Parameters: + * Input : (1) File pointer + * (2) Index + * Output: + * (3) error number + * Returns: + * Title.(From 1st #C line !) + * NULL => errors. + * Possible errors: + * SF_ERR_LINE_EMPTY + * SF_ERR_MEMORY_ALLOC + * SF_ERR_LINE_NOT_FOUND | + * SF_ERR_FILE_READ | + * SF_ERR_SCAN_NOT_FOUND | => getFirstFileC() + * SF_ERR_HEADER_NOT_FOUND | + * + *********************************************************************/ +DllExport char * +SfTitle( SpecFile *sf, long index, int *error ) +{ + char *line=NULL; + char *title; + char *ptr; + long i; + + if (sfSetCurrent(sf,index,error) == -1) + return((char *)NULL); + + if (sfGetHeaderLine( sf, FROM_FILE, SF_COMMENT, &line, error) == -1) + return((char *)NULL); + + /* + * Get title.( first word ) + */ + ptr = line; + + for ( i=0,ptr=line ; *ptr!='\t' && *ptr!='\n' && *ptr!='\0' ; i++ ) { + if ( *ptr==' ' ) { + if ( *(++ptr)==' ' ) { + break; + } else ptr--; + } + ptr++; + } + + if ( i==0 ) { + *error = SF_ERR_LINE_EMPTY; + return( (char *)NULL ); + } + + title = (char *)malloc( sizeof(char) * ( i+1 ) ); + + if ( title == (char *)NULL ) { + *error = SF_ERR_MEMORY_ALLOC; + return( title ); + } + + memcpy( title, line, sizeof(char) * i ); + /* Next line added by Armando, it may be wrong */ + free(line); + title[i] = '\0'; + + return( title ); +} + + +DllExport long +SfGeometry ( SpecFile *sf, long index, char ***lines, int *error) +{ + char string[] = " \0"; + + string[0] = SF_GEOMETRY; + + return(SfHeader(sf,index,string,lines,error)); +} + + +DllExport long +SfHeader ( SpecFile *sf, long index, char *string, char ***lines, int *error) +{ + char *headbuf, + *endheader; + + long nb_found; + + if (sfSetCurrent(sf,index,error) == -1) + return(-1); + + headbuf = sf->scanbuffer; + endheader = sf->scanbuffer + sf->scansize; + + nb_found = sfFindLines(headbuf, endheader,string, lines,error); + + if (nb_found == 0) { + return SfFileHeader(sf,index,string,lines,error); + } else { + return nb_found; + } +} + + + +DllExport long +SfFileHeader ( SpecFile *sf, long index, char *string, char ***lines, int *error) +{ + char *headbuf, + *endheader; + + if (sfSetCurrent(sf,index,error) == -1) + return(-1); + if (sf->filebuffersize > 0) + { + headbuf = sf->filebuffer; + endheader = sf->filebuffer + sf->filebuffersize; + + return(sfFindLines(headbuf,endheader,string,lines,error)); + } + else + { + return 0; + } +} + + +static long +sfFindLines(char *from,char *to,char *string,char ***ret,int *error) +{ + char **lines; + long found; + unsigned long j; + char *ptr; + short all=0; + + found = 0; + ptr = from; + + if ( string == (char *) NULL || strlen(string) == 0) + all = 1; + + /* + * Allocate memory for an array of strings + */ + if ( (lines = (char **)malloc( sizeof(char *) )) == (char **)NULL ) { + *error = SF_ERR_MEMORY_ALLOC; + return ( -1 ); + } + + /* + * First line + */ + if ( ptr[0] == '#' ) { + if ( all ) { + lines = (char **) realloc ( lines, (found+1) * sizeof(char *) ); + lines[found] = sfOneLine(ptr,to,error); + found++; + } else if ( ptr[1] == string[0]) { + for ( j=0; j < strlen(string) && ptr+j< to;j++) + if ( ptr[j+1] != string[j]) break; + if ( j == strlen(string)) { + lines = (char **) realloc ( lines, (found+1) * sizeof(char *) ); + lines[found] = sfOneLine(ptr,to,error); + found++; + } + } + } + + /* + * The rest + */ + for ( ptr = from + 1;ptr < to - 1;ptr++) { + if ( *(ptr - 1) == '\n' && *ptr == '#' ) { + if ( all ) { + lines = (char **) realloc ( lines, (found+1) * sizeof(char *) ); + lines[found] = sfOneLine(ptr,to,error); + found++; + } else if ( *(ptr+1) == string[0]) { + for ( j=0; j < strlen(string) && (ptr + j) < to;j++) + if ( ptr[j+1] != string[j]) break; + if ( j == strlen(string)) { + lines = (char **) realloc ( lines, (found+1) * sizeof(char *) ); + lines[found] = sfOneLine(ptr,to,error); + found++; + } + } + } + } + + if (found) *ret = lines; + else free(lines); + + return(found); +} + + +/********************************************************************* + * Function: char *sfGetHeaderLine( SpecFile *sf, sf_char, end, error ) + * + * Description: Gets one '#sf_char' line. + * + * Parameters: + * Input : (1) File pointer + * (2) sf_character + * (3) end ( where to stop the search ) + * Output: + * (4) error number + * Returns: + * Pointer to the line , + * NULL in case of errors. + * Possible errors: + * SF_ERR_MEMORY_ALLOC + * SF_ERR_FILE_READ | => findLine() + * + * Remark: The memory allocated should be freed by the application + * + *********************************************************************/ +int +sfGetHeaderLine( SpecFile *sf, int from , char sf_char, char **buf, int *error) +{ + + char *ptr,*headbuf; + char *endheader; + int found; + + found = 0; + + if ( from == FROM_SCAN ) { + headbuf = sf->scanbuffer; + endheader = sf->scanbuffer + sf->scanheadersize; + } else if ( from == FROM_FILE ) { + if ( sf->filebuffersize == 0 ) { + *error = SF_ERR_LINE_NOT_FOUND; + return(-1); + } + headbuf = sf->filebuffer; + endheader = sf->filebuffer + sf->filebuffersize; + } else { + *error = SF_ERR_LINE_NOT_FOUND; + return(-1); + } + + if ( headbuf[0] == '#' && headbuf[1] == sf_char) { + found = 1; + ptr = headbuf; + } else { + for ( ptr = headbuf + 1;ptr < endheader - 1;ptr++) { + if ( *(ptr - 1) == '\n' && *ptr == '#' && *(ptr+1) == sf_char) { + found = 1; + break; + } + } + } + + if (!found) { + *error = SF_ERR_LINE_NOT_FOUND; + return(-1); + } + + /* + * Beginning of the thing after '#X ' + */ + ptr = ptr + 3; + + *buf = sfOneLine(ptr,endheader,error); + + return( 0 ); +} + +static char * +sfOneLine(char *from,char *end,int *error) +{ + static char linebuf[5000]; + + char *ptr,*buf; + long i; + + ptr = from; + + for(i=0;*ptr != '\n' && ptr < end;ptr++,i++) { + linebuf[i] = *ptr; + } + + linebuf[i]='\0'; + + buf = (char *) malloc ( i+1 ); + + if (buf == ( char * ) NULL ) { + *error = SF_ERR_MEMORY_ALLOC; + return((char *)NULL); + } + strcpy(buf,(char *)linebuf); + + return(buf); +} + + +/********************************************************************* + * Function: char *sfFindWord( line, word, error ) + * + * Description: Looks for 'word' in given line and returns a + * copy of the rest of the line after the found word . + * + * Parameters: + * Input : (1) Line pointer + * (2) Word pointer + * Output: + * (3) error number + * Returns: + * Rest of the line after word. + * NULL => not found. + * Possible errors: + * SF_ERR_MEMORY_ALLOC + * + *********************************************************************/ +static char * +sfFindWord( char *line, char *word, int *error ) +{ + char *ret; + + line = strstr( line, word ); + + if ( line == (char *)NULL ) { + return( line ); + } + + line += strlen( word ); + + /* + * Delete blanks. + */ + while ( *line == ' ' || *line == '\t' ) line++; + + /* + * Copy the rest. + */ + ret = (char *)malloc( sizeof(char) * ( 1 + strlen( line )) ); + + if ( ret == (char *)NULL ) { + *error = SF_ERR_MEMORY_ALLOC; + return(ret); + } + + memcpy( ret, line, sizeof(char) * ( 1 + strlen( line )) ); + + return( ret ); +} + diff --git a/src/silx/io/specfile/src/sfindex.c b/src/silx/io/specfile/src/sfindex.c new file mode 100644 index 0000000..320b086 --- /dev/null +++ b/src/silx/io/specfile/src/sfindex.c @@ -0,0 +1,556 @@ +# /*########################################################################## +# Copyright (C) 1995-2017 European Synchrotron Radiation Facility +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. +# +# ############################################################################*/ +/************************************************************************ + * + * File: sfindex.c + * + * Project: SpecFile library + * + * Description: functions for scan numbering + * + * Author: V.Rey + * + * Date: $Date: 2004/05/12 16:56:47 $ + * + ************************************************************************/ +/* + * Log: $Log: sfindex.c,v $ + * Log: Revision 1.2 2004/05/12 16:56:47 sole + * Log: Support for windows + * Log: + * Log: Revision 1.1 2003/03/06 16:59:05 sole + * Log: Initial revision + * Log: + * Log: Revision 3.0 2000/12/20 14:17:19 rey + * Log: Python version available + * Log: + * Revision 2.1 2000/07/31 19:05:15 19:05:15 rey (Vicente Rey-Bakaikoa) + * SfUpdate and bug corrected in ReadIndex + * + * Revision 2.0 2000/04/13 13:28:54 13:28:54 rey (Vicente Rey-Bakaikoa) + * New version of the library. Complete rewrite + * Adds support for MCA + */ +/* + * File: sfindex.c + * + * Description: + * + * Project: + * + * Author: Vicente Rey Bakaikoa + * + * Date: March 2000 + */ +/* + * $Log: sfindex.c,v $ + * Revision 1.2 2004/05/12 16:56:47 sole + * Support for windows + * + * Revision 1.1 2003/03/06 16:59:05 sole + * Initial revision + * + * Revision 3.0 2000/12/20 14:17:19 rey + * Python version available + * + * Revision 2.1 2000/07/31 19:05:15 19:05:15 rey (Vicente Rey-Bakaikoa) + * SfUpdate and bug corrected in ReadIndex + * + * Revision 2.0 2000/04/13 13:26:55 13:26:55 rey (Vicente Rey-Bakaikoa) + * New version of the library. Complete rewrite + * Adds support for MCA + * + */ + +#include <SpecFile.h> +#include <SpecFileP.h> +#ifdef WIN32 +#include <stdio.h> +#include <stdlib.h> +#else +#include <unistd.h> +#endif +#include <ctype.h> + +#define ON_COMMENT 0 +#define ON_ABO 1 +#define ON_RES 2 +/* + * Declarations + */ +DllExport long * SfList ( SpecFile *sf, int *error ); +DllExport long SfIndexes ( SpecFile *sf, long number, long **idxlist ); +DllExport long SfIndex ( SpecFile *sf, long number, long order ); +DllExport long SfCondList ( SpecFile *sf, long cond, long **scan_list, + int *error ); +DllExport long SfScanNo ( SpecFile *sf ); +DllExport int SfNumberOrder ( SpecFile *sf, long index, long *number, + long *order ); +DllExport long SfNumber ( SpecFile *sf, long index ); +DllExport long SfOrder ( SpecFile *sf, long index ); + +/* + * Internal Functions + */ +static int checkAborted( SpecFile *sf, ObjectList *ptr, int *error ); + + +/********************************************************************* + * Function: long *SfList( sf, error ) + * + * Description: Creates an array with all scan numbers. + * + * Parameters: + * Input : SpecFile pointer + * Returns: + * Array with scan numbers. + * NULL if errors occured. + * Possible errors: + * SF_ERR_MEMORY_ALLOC + * + * Remark: The memory allocated should be freed by the application + * + *********************************************************************/ +DllExport long * +SfList( SpecFile *sf, int *error ) +{ + register ObjectList *ptr; + long *scan_list; + long i = 0; + + scan_list = (long *)malloc( sizeof(long) * (sf->no_scans) ); + + if ( scan_list == (long *)NULL ) { + *error = SF_ERR_MEMORY_ALLOC; + return( scan_list ); + } + + for ( ptr=sf->list.first ; ptr ; ptr=ptr->next ,i++) { + scan_list[i] = ( ((SpecScan *)(ptr->contents))->scan_no ); + } + /*printf("scanlist[%li] = %li\n",i-1,scan_list[i-1]);*/ + return( scan_list ); +} + + +/********************************************************************* + * Function: long SfIndexes( sf, number , idxlist) + * + * Description: Creates an array with all indexes with the same scan + * number. + * + * Parameters: + * Input : SpecFile pointer + * scan number + * Output : array with scan indexes + * Returns: + * Number of indexes found + * Possible errors: + * None possible + * + * Remark: The memory allocated should be freed by the application + * + *********************************************************************/ +DllExport long +SfIndexes( SpecFile *sf, long number, long **idxlist ) +{ + ObjectList *ptr; + long i; + long *indexes; + long *arr; + + i = 0; + indexes = (long *)malloc(sf->no_scans * sizeof(long)); + + for (ptr = sf->list.first; ptr; ptr=ptr->next ) { + if ( number == ((SpecScan *)(ptr->contents))->scan_no) { + indexes[i] = ((SpecScan *)(ptr->contents))->index; + i++; + } + } + + if (i == 0) + arr = (long *) NULL; + else { + arr = (long *)malloc(sizeof(long) * i); + memcpy(arr,indexes,sizeof(long) * i); + } + + *idxlist = arr; + free(indexes); + return( i ); +} + + +/********************************************************************* + * Function: long SfIndex( sf, number, order ) + * + * Description: Gets scan index from scan number and order. + * + * Parameters: + * Input : (1) Scan number + * (2) Scan order + * Returns: + * Index number. + * (-1) if not found. + * + *********************************************************************/ +DllExport long +SfIndex( SpecFile *sf, long number, long order ) +{ + ObjectList *ptr; + + ptr = findScanByNo( &(sf->list), number, order ); + if ( ptr != (ObjectList *)NULL ) + return( ((SpecScan *)(ptr->contents))->index ); + + return( -1 ); +} + + +/********************************************************************* + * Function: long SfCondList( sf, cond, scan_list, error ) + * + * Description: Creates an array with all scan numbers. + * + * Parameters: + * Input : (1) SpecFile pointer + * (2) Condition : 0 => not aborted scans ( NOT_ABORTED ) + * -1 => aborted scans ( ABORTED ) + * nn => more than 'nn' data lines + * Output: (3) Scan list + * (4) error code + * Returns: + * Number of found scans. + * ( -1 ) if errors occured. + * Possible errors: + * SF_ERR_MEMORY_ALLOC + * + * Remark: The memory allocated should be freed by the application + * + *********************************************************************/ +DllExport long +SfCondList( SpecFile *sf, long cond, long **scan_list, int *error ) +{ + register ObjectList *ptr; + long *list; + long i = 0; + int retcheck; + long index; + + *scan_list = (long *)NULL; + + list = (long *)malloc( sizeof(long) * (sf->no_scans) ); + + if ( list == (long *)NULL ) { + *error = SF_ERR_MEMORY_ALLOC; + return( -1 ); + } + + /* + * Aborted scans . + */ + if ( cond < 0 ) { /* aborted scans */ + for ( ptr=sf->list.first ; ptr ; ptr=ptr->next ) { + + retcheck = checkAborted( sf, ptr, error ); + + if ( retcheck < 0 ) { + free( list ); + return( -1 ); + } else if ( retcheck > 0) { + list[i] = ( ((SpecScan *)(ptr->contents))->scan_no ); + i++; + } + } + } else if ( cond == 0 ) { /* not aborted scans */ + for ( ptr=sf->list.first ; ptr ; ptr=ptr->next ) { + + retcheck = checkAborted( sf, ptr, error ); + + if ( retcheck < 0 ) { + free( list ); + return( -1 ); + } else if ( retcheck == 0 ) { + list[i] = ( ((SpecScan *)(ptr->contents))->scan_no ); + i++; + } + } + } else { /* cond > 0 - more than n data_lines */ + for ( ptr=sf->list.first ; ptr ; ptr=ptr->next ) { + + index = ( ((SpecScan *)(ptr->contents))->index ); + if ( SfNoDataLines(sf,index,error) <= cond ) continue; + + list[i] = ( ((SpecScan *)(ptr->contents))->scan_no ); + i++; + } + } + + *scan_list = ( long * ) malloc ( i * sizeof(long)); + + if ( *scan_list == (long *)NULL ) { + *error = SF_ERR_MEMORY_ALLOC; + return( -1 ); + } + + memcpy(*scan_list,list, i * sizeof(long)); + free(list); + + return( i ); +} + + +/********************************************************************* + * Function: long SfScanNo( sf ) + * + * Description: Gets number of scans. + * + * Parameters: + * Input :(1) SpecFile pointer + * Returns: + * Number of scans. + * + *********************************************************************/ +DllExport long +SfScanNo( SpecFile *sf ) +{ + return( sf->no_scans ); +} + + +/********************************************************************* + * Function: int SfNumberOrder( sf, index, number, order ) + * + * Description: Gets scan number and order from index. + * + * Parameters: + * Input : + * (1) SpecFile pointer + * (2) Scan index + * Output: + * (3) Scan number + * (4) Scan order + * Returns: + * ( -1 ) => not found + * ( 0 ) => found + * + *********************************************************************/ +DllExport int +SfNumberOrder( SpecFile *sf, long index, long *number, long *order ) +{ + register ObjectList *list; + + *number = -1; + *order = -1; + + /* + * Find scan . + */ + list = findScanByIndex( &(sf->list), index ); + if ( list == (ObjectList *)NULL ) return( -1 ); + + *number = ((SpecScan *)list->contents)->scan_no; + *order = ((SpecScan *)list->contents)->order; + + return( 0 ); +} + + +/********************************************************************* + * Function: long SfNumber( sf, index ) + * + * Description: Gets scan number from index. + * + * Parameters: + * Input : (1) SpecFile pointer + * (2) Scan index + * Returns: + * Scan number. + * ( -1 ) => not found + * + *********************************************************************/ +DllExport long +SfNumber( SpecFile *sf, long index ) +{ + register ObjectList *list; + + /* + * Find scan . + */ + list = findScanByIndex( &(sf->list), index ); + if ( list == (ObjectList *)NULL ) return( -1 ); + + return( ((SpecScan *)list->contents)->scan_no ); +} + + +/********************************************************************* + * Function: long SfOrder( sf, index ) + * + * Description: Gets scan order from index. + * + * Parameters: + * Input : (1) SpecFile pointer + * (2) Scan index + * Returns: + * Scan order. + * ( -1 ) => not found + * + *********************************************************************/ +DllExport long +SfOrder( SpecFile *sf, long index ) +{ + register ObjectList *list; + + + /* + * Find scan . + */ + list = findScanByIndex( &(sf->list), index ); + if ( list == (ObjectList *)NULL ) return( -1 ); + + return( ((SpecScan *)list->contents)->order ); +} + +/********************************************************************* + * Function: int checkAborted( sf, ptr, error ) + * + * Description: Checks if scan was aborted or not . + * + * Parameters: + * Input : (1) SpecScan pointer + * (2) Pointer to the scan + * Output: (3) Error number + * Returns: + * (-1 ) : error + * ( 0 ) : not aborted + * ( 1 ) : aborted + * Possible errors: + * SF_ERR_MEMORY_ALLOC | => readHeader() + * SF_ERR_FILE_READ + * + *********************************************************************/ +static int +checkAborted( SpecFile *sf, ObjectList *ptr, int *error ) +{ + long nbytes; + long data_lines,size,from; + SpecScan *scan; + char *buffer,*cptr,next; + int state=ON_COMMENT; + int aborted=0; + long index; + + scan = ptr->contents; + index = scan->index; + + data_lines = SfNoDataLines(sf,index,error); + + if ( scan->hdafter_offset == -1 && data_lines > 0) { + return(0); + } else if ( data_lines <= 0 ) { + /* + * maybe aborted on first point + * we have to all to know ( but no data anyway ) + */ + size = scan->size; + from = scan->offset; + } else { + size = scan->last - scan->hdafter_offset; + from = scan->hdafter_offset; + } + + lseek(sf->fd,from,SEEK_SET); + buffer = ( char * ) malloc (size); + nbytes = read(sf->fd,buffer,size); + + if (nbytes == -1 ) { + *error = SF_ERR_FILE_READ; + return(-1); + } + + if (buffer[0] == '#' && buffer[1] == 'C') { + state = ON_COMMENT; + } + + for ( cptr = buffer + 1; cptr < buffer + nbytes - 1; cptr++) { + /* + * Comment line + */ + if ( *cptr == '#' && *(cptr+1) == 'C' && *(cptr-1) == '\n') { + state = ON_COMMENT; + } + /* + * Check aborted + */ + if ( *(cptr-1) == 'a' && *cptr == 'b' && *(cptr+1) == 'o') { + if ( state == ON_COMMENT ) { + state = ON_ABO; + } + } + if ( *(cptr-1) == 'r' && *cptr == 't' && *(cptr+1) == 'e') { + if ( state == ON_ABO) { + aborted = 1; + } + } + /* + * Check resume line + */ + if ( *(cptr-1) == 'r' && *cptr == 'e' && *(cptr+1) == 's') { + if ( state == ON_COMMENT ) { + state = ON_RES; + } + } + if ( *(cptr-1) == 'u' && *cptr == 'm' && *(cptr+1) == 'e') { + if ( state == ON_RES) { + aborted = 0; + } + } + + /* + * If data line... aborted is aborted + */ + if ( *cptr == '\n' ) { + next = *(cptr+1); + if (isdigit(next) || next == '+' || next == '-' || next == '@') { + aborted = 0; + } + } + } + free(buffer); + return(aborted); + +/* + * To be implemented + * - return 0 = not aborted + * - return 1 = aborted + * - return -1 = error + * + * implementation: read whole scan + * - go to header after offset + * - read all till end of scan with size + * - search for a line with a) #C ( comment ) then "aborted" + */ + return( 0 ); +} diff --git a/src/silx/io/specfile/src/sfinit.c b/src/silx/io/specfile/src/sfinit.c new file mode 100644 index 0000000..ca2fa7f --- /dev/null +++ b/src/silx/io/specfile/src/sfinit.c @@ -0,0 +1,830 @@ +# /*########################################################################## +# Copyright (C) 1995-2018 European Synchrotron Radiation Facility +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. +# +# ############################################################################*/ +/************************************************************************ + * + * File: sfinit.c + * + * Project: SpecFile library + * + * Description: Initialization routines ( open/update/close ) + * + * Author: V.Rey + * + * Date: $Date: 2005/05/25 13:01:32 $ + * + ************************************************************************/ +/* + * Log: $Log: sfinit.c,v $ + * Log: Revision 1.5 2005/05/25 13:01:32 sole + * Log: Back to revision 1.3 + * Log: + * Log: Revision 1.3 2004/05/12 16:57:32 sole + * Log: windows support + * Log: + * Log: Revision 1.2 2002/11/12 13:23:43 sole + * Log: Version with added support for the new sf->updating flag + * Log: + * Log: Revision 3.0 2000/12/20 14:17:19 rey + * Log: Python version available + * Log: + * Log: Revision 2.2 2000/12/20 12:12:08 rey + * Log: bug corrected with SfAllMotors + * Log: + * Revision 2.1 2000/07/31 19:04:42 19:04:42 rey (Vicente Rey-Bakaikoa) + * SfUpdate and bug corrected in ReadIndex + * + * Revision 2.0 2000/04/13 13:28:54 13:28:54 rey (Vicente Rey-Bakaikoa) + * New version of the library. Complete rewrite + * Adds support for MCA + */ +/* + * File: sfinit.c + * + * Description: This file implements basic routines on SPEC datafiles + * SfOpen / SfClose / SfError + * + * SfUpdate is kept but it is obsolete + * + * Version: 2.0 + * + * Date: March 2000 + * + * Author: Vicente REY + * + * Copyright: E.S.R.F. European Synchrotron Radiation Facility (c) 2000 + */ +/* + * $Log: sfinit.c,v $ + * Revision 1.5 2005/05/25 13:01:32 sole + * Back to revision 1.3 + * + * Revision 1.3 2004/05/12 16:57:32 sole + * windows support + * + * Revision 1.2 2002/11/12 13:23:43 sole + * Version with added support for the new sf->updating flag + * + * Revision 3.0 2000/12/20 14:17:19 rey + * Python version available + * + * Revision 2.2 2000/12/20 12:12:08 rey + * bug corrected with SfAllMotors + * + * Revision 2.1 2000/07/31 19:04:42 19:04:42 rey (Vicente Rey-Bakaikoa) + * SfUpdate and bug corrected in ReadIndex + * + * Revision 2.0 2000/04/13 13:27:19 13:27:19 rey (Vicente Rey-Bakaikoa) + * New version of the library. Complete rewrite + * Adds support for MCA + * + * + *********************************************************************/ +#include <sys/types.h> +#include <sys/stat.h> +#include <errno.h> +#include <fcntl.h> +#include <ctype.h> + +#ifdef WIN32 +#include <stdio.h> +#include <stdlib.h> +#else +#include <unistd.h> +#endif + +#include <SpecFile.h> +#include <SpecFileP.h> + +/* + * Defines + */ + +#define ANY 0 +#define NEWLINE 1 +#define COMMENT 2 + +#define SF_ISFX ".sfI" + +#define SF_INIT 0 +#define SF_READY 1 +#define SF_MODIFIED 2 + +/* + * Function declaration + */ + +DllExport SpecFile * SfOpen ( char *name,int *error); +DllExport SpecFile * SfOpen2 ( int fd, char *name,int *error); +DllExport int SfClose ( SpecFile *sf); +DllExport short SfUpdate ( SpecFile *sf, int *error); +DllExport char * SfError ( int error); + + +#ifdef linux +char SF_SIGNATURE[] = "Linux 2ruru Sf2.0"; +#else +char SF_SIGNATURE[] = "2ruru Sf2.0"; +#endif + +/* + * Internal functions + */ +static short statusEnd ( char c2, char c1); +static void sfStartBuffer ( SpecFile *sf, SfCursor *cursor, short status,char c0, char c1,int *error); +static void sfNewLine ( SpecFile *sf, SfCursor *cursor, char c0,char c1,int *error); +static void sfHeaderLine ( SpecFile *sf, SfCursor *cursor, char c,int *error); +static void sfNewBlock ( SpecFile *sf, SfCursor *cursor, short how,int *error); +static void sfSaveScan ( SpecFile *sf, SfCursor *cursor, int *error); +static void sfAssignScanNumbers (SpecFile *sf); +static void sfReadFile ( SpecFile *sf, SfCursor *cursor, int *error); +static void sfResumeRead ( SpecFile *sf, SfCursor *cursor, int *error); +#ifdef SPECFILE_USE_INDEX_FILE +static short sfOpenIndex ( SpecFile *sf, SfCursor *cursor, int *error); +static short sfReadIndex ( int sfi, SpecFile *sf, SfCursor *cursor, int *error); +static void sfWriteIndex ( SpecFile *sf, SfCursor *cursor, int *error); +#endif + +/* + * errors + */ +typedef struct _errors { + int code; + char *message; +} sf_errors ; + +static +sf_errors errors[]={ +{ SF_ERR_MEMORY_ALLOC , "Memory allocation error ( SpecFile )" }, +{ SF_ERR_FILE_OPEN , "File open error ( SpecFile )" }, +{ SF_ERR_FILE_CLOSE , "File close error ( SpecFile )" }, +{ SF_ERR_FILE_READ , "File read error ( SpecFile )" }, +{ SF_ERR_FILE_WRITE , "File write error ( SpecFile )" }, +{ SF_ERR_LINE_NOT_FOUND , "Line not found error ( SpecFile )" }, +{ SF_ERR_SCAN_NOT_FOUND , "Scan not found error ( SpecFile )" }, +{ SF_ERR_HEADER_NOT_FOUND , "Header not found error ( SpecFile )" }, +{ SF_ERR_LABEL_NOT_FOUND , "Label not found error ( SpecFile )" }, +{ SF_ERR_MOTOR_NOT_FOUND , "Motor not found error ( SpecFile )" }, +{ SF_ERR_POSITION_NOT_FOUND , "Position not found error ( SpecFile )" }, +{ SF_ERR_LINE_EMPTY , "Line empty or wrong data error ( SpecFile )"}, +{ SF_ERR_USER_NOT_FOUND , "User not found error ( SpecFile )" }, +{ SF_ERR_COL_NOT_FOUND , "Column not found error ( SpecFile )" }, +{ SF_ERR_MCA_NOT_FOUND , "Mca not found ( SpecFile )" }, +/* MUST be always the last one : */ +{ SF_ERR_NO_ERRORS , "OK ( SpecFile )" }, +}; + + + + + +/********************************************************************* + * Function: SpecFile *SfOpen( name, error) + * + * Description: Opens connection to Spec data file. + * Creates index list in memory. + * + * Parameters: + * Input : + * (1) Filename + * Output: + * (2) error number + * Returns: + * SpecFile pointer. + * NULL if not successful. + * + * Possible errors: + * SF_ERR_FILE_OPEN + * SF_ERR_MEMORY_ALLOC + * + *********************************************************************/ + +DllExport SpecFile * +SfOpen(char *name, int *error) { + + int fd; + fd = open(name,SF_OPENFLAG); + return (SfOpen2(fd, name, error)); +} + + + +/********************************************************************* + * Function: SpecFile *SfOpen2( fd, name, error) + * + * Description: Opens connection to Spec data file. + * Creates index list in memory. + * + * Parameters: + * Input : + * (1) Integer file handle + * (2) Filename + * Output: + * (3) error number + * Returns: + * SpecFile pointer. + * NULL if not successful. + * + * Possible errors: + * SF_ERR_FILE_OPEN + * SF_ERR_MEMORY_ALLOC + * + *********************************************************************/ + +DllExport SpecFile * +SfOpen2(int fd, char *name,int *error) { + SpecFile *sf; + short idxret; + SfCursor cursor; + struct stat mystat; + + if ( fd == -1 ) { + *error = SF_ERR_FILE_OPEN; + return ( (SpecFile *) NULL ); + } + + /* + * Init specfile strucure + */ +#ifdef _WINDOWS + static HANDLE hglb; + hglb = GlobalAlloc(GPTR,sizeof(SpecFile)); + sf = (SpecFile * ) GlobalLock(hglb); +#else + sf = (SpecFile *) malloc ( sizeof(SpecFile )); +#endif + stat(name,&mystat); + + sf->fd = fd; + sf->m_time = mystat.st_mtime; + sf->sfname = (char *)strdup(name); + + sf->list.first = (ObjectList *)NULL; + sf->list.last = (ObjectList *)NULL; + sf->no_scans = 0; + sf->current = (ObjectList *)NULL; + sf->scanbuffer = (char *)NULL; + sf->scanheadersize = 0; + sf->filebuffer = (char *)NULL; + sf->filebuffersize = 0; + + sf->no_labels = -1; + sf->labels = (char **)NULL; + sf->no_motor_names = -1; + sf->motor_names = (char **)NULL; + sf->no_motor_pos = -1; + sf->motor_pos = (double *)NULL; + sf->data = (double **)NULL; + sf->data_info = (long *)NULL; + sf->updating = 0; + + /* + * Init cursor + */ + cursor.bytecnt = 0; + cursor.cursor = 0; + cursor.scanno = 0; + cursor.hdafoffset = -1; + cursor.dataoffset = -1; + cursor.mcaspectra = 0; + cursor.what = 0; + cursor.data = 0; + cursor.file_header = 0; + + +#ifdef SPECFILE_USE_INDEX_FILE + /* + * Check if index file + * open it and continue from there + */ + idxret = sfOpenIndex(sf,&cursor,error); +#else + idxret = SF_INIT; +#endif + + switch(idxret) { + case SF_MODIFIED: + sfResumeRead(sf,&cursor,error); + sfReadFile(sf,&cursor,error); + break; + + case SF_INIT: + sfReadFile(sf,&cursor,error); + break; + + case SF_READY: + break; + + default: + break; + } + + sf->cursor = cursor; + + /* + * Once is all done assign scan numbers and orders + */ + sfAssignScanNumbers(sf); + +#ifdef SPECFILE_USE_INDEX_FILE + if (idxret != SF_READY) sfWriteIndex(sf,&cursor,error); +#endif + return(sf); +} + + + + +/********************************************************************* + * + * Function: int SfClose( sf ) + * + * Description: Closes a file previously opened with SfOpen() + * and frees all memory . + * Parameters: + * Input: + * File pointer + * Returns: + * 0 : close successful + * -1 : errors occured + * + *********************************************************************/ +DllExport int +SfClose( SpecFile *sf ) +{ + register ObjectList *ptr; + register ObjectList *prevptr; + + freeAllData(sf); + + for( ptr=sf->list.last ; ptr ; ptr=prevptr ) { + free( (SpecScan *)ptr->contents ); + prevptr = ptr->prev; + free( (ObjectList *)ptr ); + } + + free ((char *)sf->sfname); + if (sf->scanbuffer != NULL) + free ((char *)sf->scanbuffer); + + if (sf->filebuffer != NULL) + free ((char *)sf->filebuffer); + + if( close(sf->fd) ) { + return( -1 ) ; + } + + free ( sf ); + sf = (SpecFile *)NULL; + + return ( 0 ); +} + + +/********************************************************************* + * + * Function: short SfUpdate( sf, error ) + * + * Description: Updates connection to Spec data file . + * Appends to index list in memory. + * + * Parameters: + * Input : + * (1) sf (pointer to the index list in memory) + * Output: + * (2) error number + * Returns: + * ( 0 ) => Nothing done. + * ( 1 ) => File was updated + * + * Possible errors: + * SF_ERR_FILE_OPEN + * SF_ERR_MEMORY_ALLOC + * + *********************************************************************/ +DllExport short +SfUpdate ( SpecFile *sf, int *error ) +{ + struct stat mystat; + long mtime; + /*printf("In SfUpdate\n"); + __asm("int3");*/ + stat(sf->sfname,&mystat); + + mtime = mystat.st_mtime; + + if (sf->m_time != mtime) { + sfResumeRead (sf,&(sf->cursor),error); + sfReadFile (sf,&(sf->cursor),error); + + sf->m_time = mtime; + sfAssignScanNumbers(sf); +#ifdef SPECFILE_USE_INDEX_FILE + sfWriteIndex (sf,&(sf->cursor),error); +#endif + return(1); + }else{ + return(0); + } +} + + +/********************************************************************* + * + * Function: char *SfError( code ) + * + * Description: Returns the message associated with error 'code'. + * + * Parameters: + * Input : error code + * + *********************************************************************/ +DllExport char * +SfError(int code ) { + int i; + + for ( i=0 ; errors[i].code!=0 ; i++ ) { + if ( errors[i].code == code ) break; + } + return( errors[i].message ); +} + + +static void +sfReadFile(SpecFile *sf,SfCursor *cursor,int *error) { + + int fd; + + char *buffer,*ptr; + + long size,bytesread; + + short status; + + fd = sf->fd; + + size = 1024*1024; + + + if ( (buffer = (char *) malloc(size)) == NULL ) { + /* + * Try smaller buffer + */ + size = 128 * 128; + if ( (buffer = (char *) malloc(size)) == NULL ) { + /* + * Uhmmm + */ + *error = SF_ERR_MEMORY_ALLOC; + free(sf->sfname); + free(sf); + sf = (SpecFile *)NULL; + return; + } + } + + status = NEWLINE; + while ((bytesread = read(fd,buffer,size)) > 0 ) { + + sfStartBuffer(sf,cursor,status,buffer[0],buffer[1],error); + + cursor->bytecnt++; + for (ptr=buffer+1;ptr < buffer + bytesread -1; ptr++,cursor->bytecnt++) { + if (*(ptr-1) == '\n' ) { + sfNewLine(sf,cursor,*ptr,*(ptr+1),error); + } + } + + cursor->bytecnt++; + status = statusEnd(buffer[bytesread-2],buffer[bytesread-1]); + } + + free(buffer); + + sf->no_scans = cursor->scanno; + if (sf->no_scans > 0) { + /* + * Save last + */ + sfSaveScan(sf,cursor,error); + } + return; + +} + + +static void +sfResumeRead ( SpecFile *sf, SfCursor *cursor, int *error) { + cursor->bytecnt = cursor->cursor; + cursor->what = 0; + cursor->hdafoffset = -1; + cursor->dataoffset = -1; + cursor->mcaspectra = 0; + cursor->data = 0; + cursor->scanno--; + sf->updating = 1; + lseek(sf->fd,cursor->bytecnt,SEEK_SET); + return; +} + + +#ifdef SPECFILE_USE_INDEX_FILE +static short +sfOpenIndex ( SpecFile *sf, SfCursor *cursor, int *error) { + char *idxname; + short namelength; + int sfi; + + namelength = strlen(sf->sfname) + strlen(SF_ISFX) + 1; + + idxname = (char *)malloc(sizeof(char) * namelength); + + sprintf(idxname,"%s%s",sf->sfname,SF_ISFX); + + if ((sfi = open(idxname,SF_OPENFLAG)) == -1) { + free(idxname); + return(SF_INIT); + } else { + free(idxname); + return(sfReadIndex(sfi,sf,cursor,error)); + } +} + + +static short +sfReadIndex ( int sfi, SpecFile *sf, SfCursor *cursor, int *error) { + SfCursor filecurs; + char buffer[200]; + long bytesread,i=0; + SpecScan scan; + short modif = 0; + long mtime; + + /* + * read signature + */ + bytesread = read(sfi,buffer,sizeof(SF_SIGNATURE)); + if (strcmp(buffer,SF_SIGNATURE) || bytesread == 0 ) { + return(SF_INIT); + } + + /* + * read cursor and specfile structure + */ + if ( read(sfi,&mtime, sizeof(long)) == 0) return(SF_INIT); + if ( read(sfi,&filecurs, sizeof(SfCursor)) == 0) return(SF_INIT); + + if (sf->m_time != mtime) modif = 1; + + while(read(sfi,&scan, sizeof(SpecScan))) { + addToList(&(sf->list), (void *)&scan, (long)sizeof(SpecScan)); + i++; + } + sf->no_scans = i; + + memcpy(cursor,&filecurs,sizeof(SfCursor)); + + if (modif) return(SF_MODIFIED); + + return(SF_READY); +} + + +static void +sfWriteIndex ( SpecFile *sf, SfCursor *cursor, int *error) { + + int fdi; + char *idxname; + short namelength; + ObjectList *obj; + long mtime; + + namelength = strlen(sf->sfname) + strlen(SF_ISFX) + 1; + + idxname = (char *)malloc(sizeof(char) * namelength); + + sprintf(idxname,"%s%s",sf->sfname,SF_ISFX); + + /* if ((fdi = open(idxname,SF_WRITEFLAG,SF_UMASK)) == -1) { */ + if ((fdi = open(idxname,O_CREAT | O_WRONLY,SF_UMASK)) == -1) { + printf(" - cannot open. Error: (%d)\n",errno); + free(idxname); + return; + } else { + mtime = sf->m_time; + write(fdi,SF_SIGNATURE,sizeof(SF_SIGNATURE)); + /* + * Swap bytes for linux + */ + write(fdi, (void *) &mtime, sizeof(long)); + write(fdi, (void *) cursor, sizeof(SfCursor)); + for( obj = sf->list.first; obj ; obj = obj->next) + write(fdi,(void *) obj->contents, sizeof(SpecScan)); + close(fdi); + free(idxname); + return; + } +} +#endif + + +/***************************************************************************** + * + * Function: static void sfStartBuffer() + * + * Description: start analyzing file buffer and takes into account the last + * bytes of previous reading as defined in variable status + * + *****************************************************************************/ +static void +sfStartBuffer(SpecFile *sf,SfCursor *cursor,short status,char c0,char c1,int *error) { + + if ( status == ANY ) { + return; + } else if ( status == NEWLINE ) { + sfNewLine(sf,cursor,c0,c1,error); + } else if ( status == COMMENT ) { + cursor->bytecnt--; + sfHeaderLine(sf,cursor,c0,error); + cursor->bytecnt++; + } + +} + + +/******************************************************************************* + * + * Function: static void statusEnd() + * + * Description: ends analysis of file buffer and returns a variable + * indicating staus ( last character is COMMENT,NEWLINE of ANY ) + * + *******************************************************************************/ +static short +statusEnd(char c2,char c1) { + + if (c2=='\n' && c1=='#') { + return(COMMENT); + } else if (c1=='\n') { + return(NEWLINE); + } else { + return(ANY); + } +} + + +static void +sfNewLine(SpecFile *sf,SfCursor *cursor,char c0,char c1,int *error) { + if (c0 == '#') { + sfHeaderLine(sf,cursor,c1,error); + } else if (c0 == '@') { + if ( cursor->data == 0 ) { + cursor->dataoffset = cursor->bytecnt; + cursor->data = 1; + } + cursor->mcaspectra++; + } else if ( isdigit(c0) || c0 == '-' || c0 == '+' || c0 == ' ' || c0 == '\t') { + if ( cursor->data == 0 ) { + cursor->dataoffset = cursor->bytecnt; + cursor->data = 1; + } + } +} + + +static void +sfHeaderLine(SpecFile *sf,SfCursor *cursor,char c,int *error) { + if ( c == 'S') { + sfNewBlock(sf,cursor,SCAN,error); + } else if ( c == 'F') { + sfNewBlock(sf,cursor,FILE_HEADER,error); + } else { + if (cursor->data && cursor->hdafoffset == -1 ) + cursor->hdafoffset = cursor->bytecnt; + } +} + + +static void +sfNewBlock(SpecFile *sf,SfCursor *cursor,short newblock,int *error) { + + /* + * Dispatch opened block + */ + if (cursor->what == SCAN) { + sfSaveScan(sf,cursor,error); + } else if ( cursor->what == FILE_HEADER) { + cursor->fileh_size = cursor->bytecnt - cursor->cursor + 1; + } + + /* + * Open new block + */ + if (newblock == SCAN) { + cursor->scanno++; + cursor->what = SCAN; + } else { + cursor->file_header = cursor->bytecnt; + } + cursor->what = newblock; + cursor->hdafoffset = -1; + cursor->dataoffset = -1; + cursor->mcaspectra = 0; + cursor->data = 0; + cursor->cursor = cursor->bytecnt; +} + + +static void +sfSaveScan(SpecFile *sf, SfCursor *cursor,int *error) { + SpecScan scan; + SpecScan *oldscan; + register ObjectList *ptr; + + + scan.index = cursor->scanno; + scan.offset = cursor->cursor; + scan.size = cursor->bytecnt - cursor->cursor; + scan.last = cursor->bytecnt - 1; + scan.data_offset = cursor->dataoffset; + scan.hdafter_offset = cursor->hdafoffset; + scan.mcaspectra = cursor->mcaspectra; + scan.file_header = cursor->file_header; + + if(sf->updating == 1){ + ptr = sf->list.last; + oldscan=(SpecScan *)(ptr->contents); + oldscan->index=scan.index; + oldscan->offset=scan.offset; + oldscan->size=scan.size; + oldscan->last=scan.last; + oldscan->data_offset=scan.data_offset; + oldscan->hdafter_offset=scan.hdafter_offset; + oldscan->mcaspectra=scan.mcaspectra; + oldscan->file_header=scan.file_header; + sf->updating=0; + }else{ + addToList( &(sf->list), (void *)&scan, (long) sizeof(SpecScan)); + } +} + + +static void +sfAssignScanNumbers(SpecFile *sf) { + + int i; + char *ptr; + char buffer[50]; + char buffer2[50]; + + register ObjectList *object, + *object2; + SpecScan *scan, + *scan2; + + for ( object = (sf->list).first; object; object=object->next) { + scan = (SpecScan *) object->contents; + + lseek(sf->fd,scan->offset,SEEK_SET); + read(sf->fd,buffer,sizeof(buffer)); + buffer[49] = '\0'; + + for ( ptr = buffer+3,i=0; *ptr != ' ';ptr++,i++) buffer2[i] = *ptr; + + buffer2[i] = '\0'; + + scan->scan_no = atol(buffer2); + scan->order = 1; + for ( object2 = (sf->list).first; object2 != object; object2=object2->next) { + scan2 = (SpecScan *) object2->contents; + if (scan2->scan_no == scan->scan_no) scan->order++; + } + } +} + +void +printCursor(SfCursor *cursor) { + printf("<Cursor>\n"); + printf(" - Bytecnt: %ld\n",cursor->bytecnt); + printf(" - Cursor: %ld\n",cursor->cursor); + printf(" - Scanno: %ld\n",cursor->scanno); +} diff --git a/src/silx/io/specfile/src/sflabel.c b/src/silx/io/specfile/src/sflabel.c new file mode 100644 index 0000000..61cbb3f --- /dev/null +++ b/src/silx/io/specfile/src/sflabel.c @@ -0,0 +1,654 @@ +# /*########################################################################## +# Copyright (C) 1995-2019 European Synchrotron Radiation Facility +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. +# +# ############################################################################*/ +/************************************************************************ + * + * File: sflabel.c + * + * Project: SpecFile library + * + * Description: Access to labels and motors + * + * Author: V.Rey + * + * Date: $Date: 2003/02/03 13:15:35 $ + * + ************************************************************************/ +/* + * Log: + * $Log: sflabel.c,v $ + * Revision 1.3 2003/02/03 13:15:35 rey + * Small change in handling of empty spaces at the beginning of the label buffer + * + * Revision 1.2 2002/11/20 09:56:31 sole + * Some macros leave more than 1 space between #L and the first label. + * Routine modified to be able to deal with already collected data. + * The offending macro(s) should be re-written. + * + * Revision 1.1 2002/11/20 08:21:34 sole + * Initial revision + * + * Revision 3.0 2000/12/20 14:17:19 rey + * Python version available + * + * Revision 2.2 2000/12/20 12:12:08 rey + * bug corrected with SfAllMotors + * + * Revision 2.1 2000/07/31 19:05:10 19:05:10 rey (Vicente Rey-Bakaikoa) + * SfUpdate and bug corrected in ReadIndex + * + * Revision 2.0 2000/04/13 13:28:54 13:28:54 rey (Vicente Rey-Bakaikoa) + * New version of the library. Complete rewrite + * Adds support for MCA + */ +#include <SpecFile.h> +#include <SpecFileP.h> +#include <locale_management.h> +#ifndef _GNU_SOURCE +#ifdef PYMCA_POSIX +#include <locale.h> +#endif +#endif + +/* + * Declarations + */ +DllExport char * SfLabel ( SpecFile *sf, long index, long column, + int *error ); +DllExport long SfAllLabels ( SpecFile *sf, long index, char ***labels, + int *error ); +DllExport char * SfMotor ( SpecFile *sf, long index, long number, + int *error ); +DllExport long SfAllMotors ( SpecFile *sf, long index, char ***names, + int *error ); +DllExport double SfMotorPos ( SpecFile *sf, long index, long number, + int *error ); +DllExport double SfMotorPosByName( SpecFile *sf, long index, char *name, + int *error ); +DllExport long SfAllMotorPos ( SpecFile *sf, long index, double **pos, + int *error ); + + +/********************************************************************* + * Function: char *SfLabel( sf, index, column, error ) + * + * Description: Reads one label. + * + * Parameters: + * Input : (1) SpecScan pointer + * (2) Scan index + * (3) Column number + * Output: (4) Error number + * Returns: + * Pointer to the label , + * or NULL if errors occured. + * Possible errors: + * SF_ERR_MEMORY_ALLOC | => getStrFromArr() + * SF_ERR_LABEL_NOT_FOUND + * SF_ERR_LINE_EMPTY | + * SF_ERR_LINE_NOT_FOUND | + * SF_ERR_SCAN_NOT_FOUND | => SfAllLabels() + * SF_ERR_FILE_READ | + * + * Remark: The memory allocated should be freed by the application + * + *********************************************************************/ +DllExport char * +SfLabel( SpecFile *sf, long index, long column, int *error ) +{ + + char **labels=NULL; + long no_labels; + char *label=NULL; + long selection; + + if (sfSetCurrent(sf,index,error) == -1) + return((char *)NULL); + + if (sf->no_labels != -1 ) { + no_labels = sf->no_labels; + } else { + no_labels = SfAllLabels(sf,index,&labels,error); + } + + if (no_labels == 0 || no_labels == -1) return((char *)NULL); + + if ( column < 0 ) { + selection = no_labels + column; + } else { + selection = column - 1; + } + + if (selection < 0 || selection > no_labels - 1 ) { + *error = SF_ERR_COL_NOT_FOUND; + if (labels != (char **) NULL ) + freeArrNZ((void ***)&labels,no_labels); + return((char *)NULL); + } + + if (labels != (char **)NULL) { + label = (char *)strdup(labels[selection]); + freeArrNZ((void ***)&labels,no_labels); + } else { + label = (char *) strdup(sf->labels[selection]); + } + return( label ); +} + + +/********************************************************************* + * Function: long SfAllLabels( sf, index, labels, error ) + * + * Description: Reads all labels in #L lines + * + * Parameters: + * Input : (1) SpecScan pointer + * (2) Scan index + * Output: (3) Labels + * (4) Error number + * Returns: + * Number of labels + * ( -1 ) if error. + * Possible errors: + * SF_ERR_MEMORY_ALLOC ||=> cpyStrArr(),lines2words() + * SF_ERR_SCAN_NOT_FOUND | => SfHeader() + * SF_ERR_FILE_READ | + * SF_ERR_LINE_EMPTY + * SF_ERR_LINE_NOT_FOUND + * Remark: The memory allocated should be freed by the application + * + *********************************************************************/ +DllExport long +SfAllLabels( SpecFile *sf, long index, char ***labels, int *error ) +{ + static char tmplab[40]; + + char **labarr; + char *onelabel; + + char *ptr, + *buf=NULL; + + long no_labels = 0; + short i; + + /* + * select scan + */ + if (sfSetCurrent(sf,index,error) == -1) { + *labels = NULL; + return(0); + } + + /* + * Do not do it if already done + */ + if (sf->labels != (char **)NULL ) { + labarr = (char **)malloc(sizeof(char *) * sf->no_labels); + for ( i=0;i<sf->no_labels;i++) + labarr[i] = (char *)strdup(sf->labels[i]); + *labels = labarr; + return(sf->no_labels); + } + + /* + * else.. + */ + if (sfGetHeaderLine(sf,FROM_SCAN,SF_LABEL,&buf,error) == -1) { + *labels = NULL; + return(0); + } + + if ( buf[0] == '\0') { + *labels = NULL; + return(0); + } + + if ( (labarr = (char **)malloc( sizeof(char *))) == (char **)NULL) { + *error = SF_ERR_MEMORY_ALLOC; + return(-1); + } + + no_labels = 0; + i = 0; + + /* + * avoid problem of having too many spaces at the beginning + * with bad written macros -> added check for empty string + * + * get rid of spaces at the beginning of the string buffer + */ + + ptr = buf; + while((ptr < buf + strlen(buf) -1) && (*ptr == ' ')) ptr++; + + for (i=0;ptr < buf + strlen(buf) -1;ptr++,i++) { + if (*ptr==' ' && *(ptr+1) == ' ') { /* two spaces delimits one label */ + tmplab[i] = '\0'; + + labarr = (char **)realloc( labarr, (no_labels+1) * sizeof(char *)); + onelabel = (char *) malloc (i+2); + strcpy(onelabel,tmplab); + labarr[no_labels] = onelabel; + + no_labels++; + i=-1; + for(;*(ptr+1) == ' ' && ptr < buf+strlen(buf)-1;ptr++); + } else { + tmplab[i] = *ptr; + } + } + + if (*ptr != ' ') { + tmplab[i] = *ptr; + i++; + } + tmplab[i] = '\0'; + + labarr = (char **)realloc( labarr, (no_labels+1) * sizeof(char *)); + onelabel = (char *) malloc (i+2); + strcpy(onelabel,tmplab); + labarr[no_labels] = onelabel; + + no_labels++; + + /* + * Save in specfile structure + */ + sf->no_labels = no_labels; + sf->labels = (char **) malloc( sizeof(char *) * no_labels); + for (i=0;i<no_labels;i++) + sf->labels[i] = (char *) strdup(labarr[i]); + + *labels = labarr; + return( no_labels ); +} + + +/********************************************************************* + * Function: long SfAllMotors( sf, index, names, error ) + * + * Description: Reads all motor names in #O lines (in file header) + * + * Parameters: + * Input : (1) SpecScan pointer + * (2) Scan index + * Output: (3) Names + * (4) Error number + * Returns: + * Number of found names + * ( -1 ) if errors. + * Possible errors: + * SF_ERR_SCAN_NOT_FOUND + * SF_ERR_LINE_NOT_FOUND + * SF_ERR_LINE_EMPTY + * SF_ERR_MEMORY_ALLOC || => cpyStrArr(),lines2words() + * SF_ERR_FILE_READ | + * SF_ERR_HEADER_NOT_FOUND | => SfFileHeader() + * + * Remark: The memory allocated should be freed by the application + * + *********************************************************************/ +DllExport long +SfAllMotors( SpecFile *sf, long index, char ***names, int *error ) +{ + char **lines; + char *thisline, + *endline; + + char **motarr; + char *onemot; + + static char tmpmot[40]; + + char *ptr; + + long motct = 0; + long no_lines; + short i,j; + + /* + * go to scan + */ + if (sfSetCurrent(sf,index,error) == -1) { + *names = NULL; + return(0); + } + + /* + * if motor names for this scan have already been read + */ + if (sf->motor_names != (char **)NULL) { + motarr = (char **)malloc(sizeof(char *) * sf->no_motor_names); + for (i=0;i<sf->no_motor_names;i++) { + motarr[i] = (char *) strdup (sf->motor_names[i]); + } + *names = motarr; + return(sf->no_motor_names); + } + + /* + * else + */ + no_lines = SfHeader(sf, index,"O",&lines,error); + if (no_lines == -1 || no_lines == 0 ) { + *names = (char **) NULL; + return(-1); + } + + if ( (motarr = (char **)malloc( sizeof(char *))) == (char **)NULL) { + *error = SF_ERR_MEMORY_ALLOC; + return(-1); + } + + motct = 0; + + for (j=0;j<no_lines;j++) { + thisline = lines[j] + 4; + endline = thisline + strlen(thisline); + for(ptr=thisline;*ptr == ' ';ptr++); + for (i=0;ptr < endline -2;ptr++,i++) { + if (*ptr==' ' && *(ptr+1) == ' ') { + tmpmot[i] = '\0'; + + motarr = (char **)realloc( motarr, (motct+1) * sizeof(char *)); + onemot = (char *) malloc (i+2); + strcpy(onemot,tmpmot); + motarr[motct] = onemot; + + motct++; + i=-1; + for(;*(ptr+1) == ' ' && ptr < endline -1;ptr++); + } else { + tmpmot[i] = *ptr; + } + } + if (*ptr != ' ') { tmpmot[i] = *ptr; i++; } + ptr++; + if (*ptr != ' ') { tmpmot[i] = *ptr; i++; } + + tmpmot[i] = '\0'; + motarr = (char **)realloc( motarr, (motct+1) * sizeof(char *)); + + onemot = (char *) malloc (i+2); + strcpy(onemot,tmpmot); + motarr[motct] = onemot; + + motct++; + + } + + /* + * Save in specfile structure + */ + sf->no_motor_names = motct; + sf->motor_names = (char **)malloc(sizeof(char *) * motct); + for (i=0;i<motct;i++) { + sf->motor_names[i] = (char *)strdup(motarr[i]); + } + + *names = motarr; + return( motct ); + +} + + +DllExport char * +SfMotor( SpecFile *sf, long index, long motnum, int *error ) +{ + + char **motors=NULL; + long nb_mot; + char *motor=NULL; + long selection; + + /* + * go to scan + */ + if (sfSetCurrent(sf,index,error) == -1) { + return((char *)NULL); + } + + if ( sf->no_motor_names != -1 ) { + nb_mot = sf->no_motor_names; + } else { + nb_mot = SfAllMotors(sf,index,&motors,error); + } + + if (nb_mot == 0 || nb_mot == -1) return((char *)NULL); + + if ( motnum < 0 ) { + selection = nb_mot + motnum; + } else { + selection = motnum - 1; + } + + if (selection < 0 || selection > nb_mot - 1 ) { + *error = SF_ERR_COL_NOT_FOUND; + if (motors != (char **) NULL) + freeArrNZ((void ***)&motors,nb_mot); + return((char *)NULL); + } + + if (motors != (char **) NULL) { + motor = (char *)strdup(motors[selection]); + freeArrNZ((void ***)&motors,nb_mot); + } else { + motor = (char *)strdup(sf->motor_names[selection]); + } + return( motor ); +} + + +DllExport long +SfAllMotorPos ( SpecFile *sf, long index, double **retpos, int *error ) +{ + char **lines; + char *thisline, + *endline; + + double *posarr; + + static double pos[300]; + static char posstr[40]; + + char *ptr; + + long motct = 0; + long no_lines; + short i,j; + +#ifndef _GNU_SOURCE +#ifdef PYMCA_POSIX + char *currentLocaleBuffer; + char localeBuffer[21]; +#endif +#endif + + if (sfSetCurrent(sf,index,error) == -1) { + *retpos = (double *) NULL; + return(0); + } + + /* + * if motors position for this scan have already been read + */ + if (sf->motor_pos != (double *)NULL) { + posarr = (double *)malloc(sizeof(double) * sf->no_motor_pos); + for (i=0;i<sf->no_motor_pos;i++) { + posarr[i] = sf->motor_pos[i]; + } + *retpos = posarr; + return(sf->no_motor_pos); + } + + /* + * else + */ + no_lines = SfHeader(sf, index,"P",&lines,error); + + if (no_lines == -1 || no_lines == 0 ) { + *retpos = (double *) NULL; + return(-1); + } + + motct = 0; +#ifndef _GNU_SOURCE +#ifdef PYMCA_POSIX + currentLocaleBuffer = setlocale(LC_NUMERIC, NULL); + strcpy(localeBuffer, currentLocaleBuffer); + setlocale(LC_NUMERIC, "C\0"); +#endif +#endif + for (j=0;j<no_lines;j++) { + thisline = lines[j] + 4; + endline = thisline + strlen(thisline); + for(ptr=thisline;*ptr == ' ';ptr++); + for (i=0;ptr < endline -1;ptr++,i++) { + if (*ptr==' ') { + posstr[i] = '\0'; + + pos[motct] = PyMcaAtof(posstr); + + motct++; + i=-1; + for(;*(ptr+1) == ' ' && ptr < endline -1;ptr++); + } else { + posstr[i] = *ptr; + } + } + if (*ptr != ' ') { + posstr[i] = *ptr; + i++; + } + posstr[i] = '\0'; + pos[motct] = PyMcaAtof(posstr); + + motct++; + + } + +#ifndef _GNU_SOURCE +#ifdef PYMCA_POSIX + setlocale(LC_NUMERIC, localeBuffer); +#endif +#endif + + /* + * Save in specfile structure + */ + sf->no_motor_pos = motct; + sf->motor_pos = (double *)malloc(sizeof(double) * motct); + memcpy(sf->motor_pos,pos,motct * sizeof(double)); + + /* + * and return + */ + posarr = (double *) malloc ( sizeof(double) * motct ) ; + memcpy(posarr,pos,motct * sizeof(double)); + + *retpos = posarr; + + return( motct ); +} + + +DllExport double +SfMotorPos( SpecFile *sf, long index, long motnum, int *error ) +{ + + double *motorpos=NULL; + long nb_mot; + double retpos; + long selection; + + if (sfSetCurrent(sf,index,error) == -1) + return(HUGE_VAL); + + if (sf->no_motor_pos != -1 ) { + nb_mot = sf->no_motor_pos; + } else { + nb_mot = SfAllMotorPos(sf,index,&motorpos,error); + } + + if (nb_mot == 0 || nb_mot == -1) return(HUGE_VAL); + + if ( motnum < 0 ) { + selection = nb_mot + motnum; + } else { + selection = motnum - 1; + } + + if (selection < 0 || selection > nb_mot - 1 ) { + *error = SF_ERR_COL_NOT_FOUND; + if (motorpos != (double *)NULL) + free(motorpos); + return(HUGE_VAL); + } + + if (motorpos != (double *)NULL) { + retpos = motorpos[selection]; + free(motorpos); + } else { + retpos = sf->motor_pos[selection]; + } + return( retpos ); +} + + +DllExport double +SfMotorPosByName( SpecFile *sf, long index, char *name, int *error ) +{ + char **motors=NULL; + + long nb_mot, + idx, + selection; + short tofree=0; + + if (sfSetCurrent(sf,index,error) == -1) + return(HUGE_VAL); + + if ( sf->no_motor_names != -1 ) { + nb_mot = sf->no_motor_names; + motors = sf->motor_names; + } else { + nb_mot = SfAllMotors(sf,index,&motors,error); + tofree=1; + } + + if (nb_mot == 0 || nb_mot == -1) return(HUGE_VAL); + + for (idx = 0;idx<nb_mot;idx++) { + if (!strcmp(name,motors[idx])) break; + } + + if (idx == nb_mot) { + if (tofree) freeArrNZ((void ***)&motors,nb_mot); + *error = SF_ERR_MOTOR_NOT_FOUND; + return(HUGE_VAL); + } + + selection = idx+1; + + return(SfMotorPos(sf,index,selection,error)); +} diff --git a/src/silx/io/specfile/src/sflists.c b/src/silx/io/specfile/src/sflists.c new file mode 100644 index 0000000..aca267f --- /dev/null +++ b/src/silx/io/specfile/src/sflists.c @@ -0,0 +1,189 @@ +# /*########################################################################## +# Copyright (C) 1995-2017 European Synchrotron Radiation Facility +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. +# +# ############################################################################*/ +/************************************************************************ + * + * File: sflists.c + * + * Project: SpecFile library + * + * Description: Functions to handle lists + * + * Author: V.Rey + * + * Date: $Date: 2003/03/06 17:00:42 $ + * + ************************************************************************/ +/* + * Log: $Log: sflists.c,v $ + * Log: Revision 1.1 2003/03/06 17:00:42 sole + * Log: Initial revision + * Log: + * Log: Revision 3.0 2000/12/20 14:17:19 rey + * Log: Python version available + * Log: + * Revision 2.1 2000/07/31 19:03:25 19:03:25 rey (Vicente Rey-Bakaikoa) + * SfUpdate and bug corrected in ReadIndex + * + * Revision 2.0 2000/04/13 13:28:54 13:28:54 rey (Vicente Rey-Bakaikoa) + * New version of the library. Complete rewrite + * Adds support for MCA + */ +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <Lists.h> + +/* + * Function declaration + */ +ObjectList * findInList ( ListHeader *list, int (*proc)(void *,void *), void *value ); +long addToList ( ListHeader *list, void *object, long size ); +void unlinkFromList ( ListHeader *list, ObjectList *element ); + +static long linkToList ( ListHeader *list, void *object ); + + +/********************************************************************* + * Function: ObjectList *findInList( list, proc, value ) + * + * Description: Looks for an list element. + * + * Parameters: + * Input : (1) ListHeader pointer + * (2) Comp. procedure + * (3) value + * Returns: + * Pointer to the found element , + * NULL if not found . + * + *********************************************************************/ +ObjectList * +findInList( ListHeader *list, int (*proc)(void * , void *), void *value ) +{ + register ObjectList *ptr; + + for ( ptr=list->first ; ptr ; ptr=ptr->next ) { + if ( (*proc)(ptr->contents, value) ) { + return( ptr ); + } + } + return (ObjectList *)NULL; +} + + +/********************************************************************* + * Function: int addToList( list, object, size ) + * + * Description: Adds an element to the list. + * + * Parameters: + * Input : (1) List pointer + * (2) Pointer to the new element + * (3) Size of the new element + * Returns: + * ( 0 ) => OK + * ( -1 ) => error + * + *********************************************************************/ +long +addToList( ListHeader *list, void *object, long size ) +{ + void *newobj; + + if ( (newobj = (void *)malloc(size)) == (void *)NULL ) return( -1 ); + memcpy(newobj, object, size); + + return( linkToList( list, newobj ) ); + +} + + +/********************************************************************* + * Function: int linkToList( list, object ) + * + * Description: Adds an element to the list. + * + * Parameters: + * Input: (1) ListHeader pointer + * (2) pointer to the new element + * Returns: + * ( 0 ) => OK + * ( -1 ) => error + * + *********************************************************************/ +static long +linkToList( ListHeader *list, void *object ) +{ + ObjectList *newobj; + + + if ((newobj = (ObjectList *) malloc(sizeof(ObjectList))) == + (ObjectList *) NULL) return( -1 ); + + newobj->contents = object; + newobj->prev = list->last; + newobj->next = NULL; + + if (list->first == (ObjectList *)NULL) { + list->first = newobj; + } else { + (list->last)->next = newobj; + } + + list->last = newobj; + return( 0 ); +} + + +/********************************************************************* + * Function: int unlinkFromList( list, element ) + * + * Description: Removes an element from the list. + * + * Parameters: + * Input : (1) List pointer + * (2) Pointer to the element + * + *********************************************************************/ +void +unlinkFromList( ListHeader *list, ObjectList *element ) +{ + + if ( element != (ObjectList *)NULL ) { + if ( element->next != (ObjectList *)NULL ) { + element->next->prev = element->prev; + } + else { + list->last = element->prev ; + } + if ( element->prev != (ObjectList *)NULL ) { + element->prev->next = element->next; + } + else { + list->first = element->next; + } + free( element->contents ); + free( element ); + } +} + diff --git a/src/silx/io/specfile/src/sfmca.c b/src/silx/io/specfile/src/sfmca.c new file mode 100644 index 0000000..ad13bae --- /dev/null +++ b/src/silx/io/specfile/src/sfmca.c @@ -0,0 +1,341 @@ +# /*########################################################################## +# Copyright (C) 2000-2017 European Synchrotron Radiation Facility +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. +# +# ############################################################################*/ +/************************************************************************ + * + * File: sfmca.c + * + * Project: SpecFile library + * + * Description: Access to MCA spectra + * + * Author: V.Rey + * + * Date: $Date: 2002/11/15 16:25:44 $ + * + ************************************************************************/ +/* + * Log: $Log: sfmca.c,v $ + * Log: Revision 1.3 2002/11/15 16:25:44 sole + * Log: free(retline) replaced by freeArrNZ((void ***) &retline,nb_lines); to eliminate the memory leak when reading mca + * Log: + * Log: Revision 1.2 2002/11/15 10:44:36 sole + * Log: added free(retline) after call to SfHeader + * Log: + * Log: Revision 1.1 2002/11/15 10:17:38 sole + * Log: Initial revision + * Log: + * Log: Revision 3.0 2000/12/20 14:17:19 rey + * Log: Python version available + * Log: + * Revision 2.1 2000/07/31 19:05:12 19:05:12 rey (Vicente Rey-Bakaikoa) + * SfUpdate and bug corrected in ReadIndex + * + * Revision 2.0 2000/04/13 13:28:54 13:28:54 rey (Vicente Rey-Bakaikoa) + * New version of the library. Complete rewrite + * Adds support for MCA + */ +#include <SpecFile.h> +#include <SpecFileP.h> +#include <locale_management.h> +#ifndef _GNU_SOURCE +#ifdef PYMCA_POSIX +#include <locale.h> +#endif +#endif + +#include <ctype.h> +#include <stdlib.h> +/* + * Define macro + */ +#define isnumber(this) ( isdigit(this) || this == '-' || this == '+' || this =='e' || this == 'E' || this == '.' ) + +/* + * Mca continuation character + */ +#define MCA_CONT '\\' +#define D_INFO 3 + +/* + * Declarations + */ +DllExport long SfNoMca ( SpecFile *sf, long index, int *error ); +DllExport int SfGetMca ( SpecFile *sf, long index, long mcano, + double **retdata, int *error ); +DllExport long SfMcaCalib ( SpecFile *sf, long index, double **calib, + int *error ); + + +/********************************************************************* + * Function: long SfNoMca( sf, index, error ) + * + * Description: Gets number of mca spectra in a scan + * + * Parameters: + * Input : (1) File pointer + * (2) Index + * Output: + * (3) error number + * Returns: + * Number of data lines , + * ( -1 ) => errors. + * Possible errors: + * SF_ERR_SCAN_NOT_FOUND + * + *********************************************************************/ +DllExport long +SfNoMca( SpecFile *sf, long index, int *error ) +{ + + if (sfSetCurrent(sf,index,error) == -1 ) + return(-1); + + return( ((SpecScan *)sf->current->contents)->mcaspectra ); + +} + + +/********************************************************************* + * Function: int SfGetMca(sf, index, number, data, error) + * + * Description: Gets data. + * Parameters: + * Input : (1) File pointer + * (2) Index + * Output: + * (3) Data array + * (4) Data info : [0] => no_lines + * [1] => no_columns + * [2] = ( 0 ) => regular + * ( 1 ) => not regular ! + * (5) error number + * Returns: + * ( 0 ) => OK + * ( -1 ) => errors occured + * Possible errors: + * SF_ERR_MEMORY_ALLOC + * SF_ERR_FILE_READ + * SF_ERR_SCAN_NOT_FOUND + * SF_ERR_LINE_NOT_FOUND + * + * Remark: The memory allocated should be freed by the application + * + *********************************************************************/ +DllExport int +SfGetMca( SpecFile *sf, long index, long number, double **retdata, int *error ) +{ + double *data = NULL; + long headersize; + int old_fashion; + static char* last_from = NULL; + static char* last_pos = NULL; + static long last_number = 0; + long int scanno = 0; + static long int last_scanno = 0; + char *ptr, + *from, + *to; + + char strval[100]; + double val; + + int i,spect_no=0; + long vals; + + long blocks=1, + initsize=1024; +#ifndef _GNU_SOURCE +#ifdef PYMCA_POSIX + char *currentLocaleBuffer; + char localeBuffer[21]; +#endif +#endif + + headersize = ((SpecScan *)sf->current->contents)->data_offset + - ((SpecScan *)sf->current->contents)->offset; + + scanno = ((SpecScan *)sf->current->contents)->scan_no; + + /* + * check that mca number is available + */ + if (number < 1) { + *error = SF_ERR_MCA_NOT_FOUND; + *retdata = (double *)NULL; + return(-1); + } + + /* + * Get MCA info from header + */ + + from = sf->scanbuffer + headersize; + to = sf->scanbuffer + ((SpecScan *)sf->current->contents)->size; + + old_fashion = 1; + if (last_scanno == scanno) + { + if (last_from == from) + { + /* same scan as before */ + if (number > last_number) + { + spect_no = last_number; + old_fashion = 0; + } + } + } + if (old_fashion) + { + last_scanno = scanno; + last_from = from; + spect_no = 0; + last_pos = from; + } + /* + * go and find the beginning of spectrum + */ + ptr = last_pos; + + if ( *ptr == '@' ) { + spect_no++; + ptr++; + last_pos = ptr; + } + + while ( spect_no != number && ptr < to ) { + if (*ptr == '@') spect_no++; + ptr++; + last_pos = ptr; + } + ptr++; + + if ( spect_no != number ) { + *error = SF_ERR_MCA_NOT_FOUND; + *retdata = (double *)NULL; + return(-1); + } + last_number = spect_no; + /* + * Calculate size and book memory + */ + initsize = 2048; + + i = 0; + vals = 0; + + /* + * Alloc memory + */ + if ((data = (double *)malloc (sizeof(double) * initsize)) == (double *)NULL) { + *error = SF_ERR_MEMORY_ALLOC; + return(-1); + } + + /* + * continue + */ +#ifndef _GNU_SOURCE +#ifdef PYMCA_POSIX + currentLocaleBuffer = setlocale(LC_NUMERIC, NULL); + strcpy(localeBuffer, currentLocaleBuffer); + setlocale(LC_NUMERIC, "C\0"); +#endif +#endif + for ( ;(*(ptr+1) != '\n' || (*ptr == MCA_CONT)) && ptr < to - 1 ; ptr++) + { + if (*ptr == ' ' || *ptr == '\t' || *ptr == '\\' || *ptr == '\n') { + if ( i ) { + if ( vals%initsize == 0 ) { + blocks++; + if ((data = (double *)realloc (data, sizeof(double) * blocks * initsize)) + == (double *)NULL) { + *error = SF_ERR_MEMORY_ALLOC; +#ifndef _GNU_SOURCE +#ifdef PYMCA_POSIX + setlocale(LC_NUMERIC, localeBuffer); +#endif +#endif + return(-1); + } + + } + strval[i] = '\0'; + i = 0; + val = PyMcaAtof(strval); + data[vals] = val; + vals++; + } + } else if (isnumber(*ptr)) { + strval[i] = *ptr; + i++; + } + } + + if (isnumber(*ptr)) { + strval[i] = *ptr; + strval[i+1] = '\0'; + val = PyMcaAtof(strval); + data[vals] = val; + vals++; + } +#ifndef _GNU_SOURCE +#ifdef PYMCA_POSIX + setlocale(LC_NUMERIC, localeBuffer); +#endif +#endif + + *retdata = data; + + return( vals ); +} + + +DllExport long +SfMcaCalib ( SpecFile *sf, long index, double **calib, int *error ) +{ + + long nb_lines; + char **retline; + char *strptr; + + double val1,val2,val3; + + double *retdata; + + nb_lines = SfHeader(sf,index,"@CALIB",&retline,error); + + if (nb_lines > 0) { + strptr = retline[0] + 8; + sscanf(strptr,"%lf %lf %lf",&val1,&val2,&val3); + } else { + *calib = (double *)NULL; + return(-1); + } + + retdata = (double *) malloc(sizeof(double) * 3 ); + retdata[0] = val1; retdata[1] = val2; retdata[2] = val3; + + *calib = retdata; + return(0); +} diff --git a/src/silx/io/specfile/src/sftools.c b/src/silx/io/specfile/src/sftools.c new file mode 100644 index 0000000..9b78b67 --- /dev/null +++ b/src/silx/io/specfile/src/sftools.c @@ -0,0 +1,554 @@ +# /*########################################################################## +# Copyright (C) 1995-2017 European Synchrotron Radiation Facility +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. +# +# ############################################################################*/ +/************************************************************************ + * + * File: sftools.c + * + * Project: SpecFile library + * + * Description: General library tools + * + * Author: V.Rey + * + * Date: $Date: 2004/05/12 16:57:02 $ + * + ************************************************************************/ +/* + * Log: $Log: sftools.c,v $ + * Log: Revision 1.2 2004/05/12 16:57:02 sole + * Log: Windows support + * Log: + * Log: Revision 1.1 2003/09/12 10:34:11 sole + * Log: Initial revision + * Log: + * Log: Revision 3.0 2000/12/20 14:17:19 rey + * Log: Python version available + * Log: + * Log: Revision 2.2 2000/12/20 12:12:08 rey + * Log: bug corrected with SfAllMotors + * Log: + * Revision 2.1 2000/07/31 19:05:07 19:05:07 rey (Vicente Rey-Bakaikoa) + * SfUpdate and bug corrected in ReadIndex + * + * Revision 2.0 2000/04/13 13:28:54 13:28:54 rey (Vicente Rey-Bakaikoa) + * New version of the library. Complete rewrite + * Adds support for MCA + */ +#include <SpecFile.h> +#include <SpecFileP.h> + +#ifdef WIN32 +#include <stdio.h> +#include <stdlib.h> +#else +#include <unistd.h> +#endif + +/* + * Library Functions + */ +DllExport void freePtr ( void *ptr ); +DllExport void freeArrNZ ( void ***ptr, long lines ); +DllExport void SfShow (SpecFile *sf); +DllExport void SfShowScan (SpecFile *sf, long index); + +/* + * Function declaration + */ +void freeArr ( void ***ptr, long lines ); + +int sfSetCurrent ( SpecFile *sf, long index, int *error ); +int sfSameFile ( SpecFile *sf, ObjectList *list ); +int sfSameScan ( SpecFile *sf, long index ); + +int findIndex ( void *scan, void *number ); +int findNoAndOr ( void *scan, void *number ); +int findFirst ( void *scan, void *file_offset ); +ObjectList *findScanByIndex ( ListHeader *list, long index ); +ObjectList *findFirstInFile ( ListHeader *list, long file_offset ); +ObjectList *findScanByNo ( ListHeader *list, long scan_no, long order ); + +long mulstrtod ( char *str, double **arr, int *error ); +void freeAllData ( SpecFile *sf ); + +/* + * Globals + */ + + +/********************************************************************* + * Function: void sfSetCurrent( sf, list ) + * + * Description: Sets 'list' to current scan. + * Updates SpecFile structure. + * Parameters: + * Input : (1) SpecFile pointer + * (2) New scan + * + *********************************************************************/ +int +sfSetCurrent( SpecFile *sf, long index,int *error ) +{ + ObjectList *list, + *flist; + SpecScan *scan, + *fscan; + long nbytes; + long fileheadsize,start; + + /* + * If same scan nothing to do + */ + if (sfSameScan(sf,index)) return(0); + + /* + * It is a new scan. Free memory allocated for previous one. + */ + freeAllData(sf); + + /* + * Find scan + */ + list = findScanByIndex(&(sf->list),index); + + if (list == (ObjectList *)NULL) { + *error = SF_ERR_SCAN_NOT_FOUND; + return(-1); + } + + /* + * Read full scan into buffer + */ + scan = list->contents; + + if (sf->scanbuffer != ( char * ) NULL) free(sf->scanbuffer); + + sf->scanbuffer = ( char *) malloc(scan->size); + + if (sf->scanbuffer == (char *)NULL) { + *error = SF_ERR_MEMORY_ALLOC; + return(-1); + } + + lseek(sf->fd,scan->offset,SEEK_SET); + nbytes = read(sf->fd,sf->scanbuffer,scan->size); + if ( nbytes == -1) { + *error = SF_ERR_FILE_READ; + return(-1); + } + if ( sf->scanbuffer[0] != '#' || sf->scanbuffer[1] != 'S') { + *error = SF_ERR_FILE_READ; + return(-1); + } + sf->scanheadersize = scan->data_offset - scan->offset; + + /* + * if different file read fileheader also + */ + if (!sfSameFile(sf,list)) { + if (sf->filebuffer != ( char * ) NULL) free(sf->filebuffer); + + start = scan->file_header; + flist = findFirstInFile(&(sf->list),scan->file_header); + if (flist == (ObjectList *) NULL) { + fileheadsize = 0; + sf->filebuffersize = fileheadsize; + } + else + { + fscan = flist->contents; + fileheadsize = fscan->offset - start; + } + + if (fileheadsize > 0) { + sf->filebuffer = ( char *) malloc(fileheadsize); + if (sf->filebuffer == (char *)NULL) { + *error = SF_ERR_MEMORY_ALLOC; + return(-1); + } + lseek(sf->fd,start,SEEK_SET); + nbytes = read(sf->fd,sf->filebuffer,fileheadsize); + if ( nbytes == -1) { + *error = SF_ERR_FILE_READ; + return(-1); + } + sf->filebuffersize = fileheadsize; + } + } + sf->scansize = scan->size; + sf->current = list; + + return(1); +} + + +/********************************************************************* + * Function: int sfSameFile( sf, list ) + * + * Description: Checks if the current scan file header and + * the new scan file header are the same. + * Parameters: + * Input : (1) SpecFile pointer + * (2) New scan + * Returns: + * 1 - the same + * 0 - not the same + * + *********************************************************************/ +int +sfSameFile( SpecFile *sf, ObjectList *list ) +{ + if (sf->current) { + return ( ((SpecScan *)sf->current->contents)->file_header == + ((SpecScan *)list->contents)->file_header ); + } else return(0); +} + + +/********************************************************************* + * Function: int sfSameScan( sf, index ) + * + * Description: Checks if the current scan and + * the new scan are the same. + * Parameters: + * Input : (1) SpecFile pointer + * (2) New scan index + * Returns: + * 1 - the same + * 0 - not the same + * + *********************************************************************/ +int +sfSameScan( SpecFile *sf, long index ) +{ + if ( sf->current == (ObjectList *)NULL) return(0); + + return ( ((SpecScan *)sf->current->contents)->index == index ); +} + + +/********************************************************************* + * Function: freePtr( ptr ); + * + * Description: Frees memory pointed to by 'ptr'. + * + * Parameters: + * Input : (1) Pointer + * + *********************************************************************/ +void +freePtr( void *ptr ) +{ + free( ptr ); +} + + +/********************************************************************* + * Function: freeArrNZ( ptr, lines ); + * + * Description: Frees an array if 'lines' > zero. + * + * Parameters: + * Input : (1) Array pointer + * (2) No. of lines + * + *********************************************************************/ +void +freeArrNZ( void ***ptr, long lines ) +{ + if ( *ptr != (void **)NULL && lines > 0 ) { + for ( ; lines ; lines-- ) { + free( (*ptr)[lines-1] ); + } + free( *ptr ); + *ptr = ( void **)NULL ; + } +} + + +/********************************************************************* + * Function: freeArr( ptr, lines ); + * + * Description: Frees an array. + * 'ptr' will be always freed !!! + * + * Parameters: + * Input : (1) Array pointer + * (2) No. of lines + * + *********************************************************************/ +void +freeArr( void ***ptr, long lines ) +{ + if ( *ptr != (void **)NULL ) { + if ( lines > 0 ) { + for ( ; lines ; lines-- ) { + free( (*ptr)[lines-1] ); + } + } + free( *ptr ); + *ptr = ( void **)NULL ; + } +} + + +/********************************************************************* + * Function: int findIndex( scan, number ) + * + * Description: Compares if number == scan index . + * + * Parameters: + * Input : (1) SpecScan pointer + * (2) number + * Returns: + * 0 : not found + * 1 : found + * + *********************************************************************/ +int +findIndex( void *scan, void *number ) +{ + return( ((SpecScan *)scan)->index == *(long *)number ); +} + + +/********************************************************************* + * Function: int findFirst( scan, file_offset ) + * + * Description: Compares if scan offset > file_offset + * + * Parameters: + * Input : (1) SpecScan pointer + * (2) number + * Returns: + * 0 : not found + * 1 : found + * + *********************************************************************/ +int +findFirst( void *scan, void *file_offset ) +{ + return( ((SpecScan *)scan)->offset > *(long *)file_offset ); +} + + +/********************************************************************* + * Function: int findNoAndOr( scan, number ) + * ( Number + * Order ) + * + * Description: Compares if number1 = scan number and + * number2 = scan order + * Parameters: + * Input: (1) SpecScan pointer + * (2) number[1] + * Returns: + * 0 : not found + * 1 : found + * + *********************************************************************/ +int +findNoAndOr( void *scan, void *number ) +{ + + long *n = (long *)number; + + return( ( ((SpecScan *)scan)->scan_no == *n++ ) && ( ((SpecScan *)scan)->order == *n )); +} + + +/********************************************************************* + * Function: ObjectList *findScanByIndex( list, index ) + * + * Description: Looks for a scan . + * + * Parameters: + * Input: (1) List pointer + * (2) scan index + * Returns: + * ObjectList pointer if found , + * NULL if not. + * + *********************************************************************/ +ObjectList * +findScanByIndex( ListHeader *list, long index ) +{ + return findInList( list, findIndex, (void *)&index ); +} + + +/********************************************************************* + * Function: ObjectList findScanByNo( list, scan_no, order ) + * + * Description: Looks for a scan . + * + * Parameters: + * Input: (1) List pointer + * (2) scan number + * (3) scan order + * Returns: + * ObjectList pointer if found , + * NULL if not. + * + *********************************************************************/ +ObjectList * +findScanByNo( ListHeader *list, long scan_no, long order ) +{ + long value[2]; + + value[0] = scan_no; + value[1] = order; + + return( findInList( (void *)list, findNoAndOr, (void *)value) ); +} + + + +/********************************************************************* + * Function: ObjectList *findFirstInFile( list, file_offset ) + * + * Description: Looks for a scan . + * + * Parameters: + * Input: (1) List pointer + * (2) scan index + * Returns: + * ObjectList pointer if found , + * NULL if not. + * + *********************************************************************/ +ObjectList * +findFirstInFile( ListHeader *list, long file_offset ) +{ + return findInList( list, findFirst, (void *)&file_offset ); +} + + +/********************************************************************* + * Function: long mulstrtod( str, arr, error ) + * + * Description: Converts string to data array.( double array ) + * + * Parameters: + * Input : (1) String + * + * Output: + * (2) Data array + * (3) error number + * Returns: + * Number of values. + * ( -1 ) in case of errors. + * Possible errors: + * SF_ERR_MEMORY_ALLOC + * + * Remark: The memory allocated should be freed by the application + * + *********************************************************************/ +long +mulstrtod( char *str, double **arr, int *error ) +{ + int count,q,i=0; + double *ret; + char *str2; + static double tmpret[200]; + + *arr = (double *)NULL; + + str2 = str; + + while( (q = sscanf(str2, "%lf%n", &(tmpret[i]), &count)) > 0 ) { + i++; + str2 += count; + } + str2++; + + if ( !i ) { + return( i ); + } + + ret = (double *)malloc( sizeof(double) * i ); + + if ( ret == (double *)NULL ) { + *error = SF_ERR_MEMORY_ALLOC; + return( -1 ); + } + memcpy(ret, tmpret, i * sizeof(double) ); + + *arr = ret; + return( i ); +} + +void +freeAllData(SpecFile *sf) +{ + if (sf->motor_pos != (double *)NULL) { + free(sf->motor_pos); + sf->motor_pos = (double *)NULL; + sf->no_motor_pos = -1; + } + if (sf->motor_names != (char **)NULL) { + freeArrNZ((void ***)&(sf->motor_names),sf->no_motor_names); + sf->motor_names = (char **)NULL; + sf->no_motor_names = -1; + } + if (sf->labels != (char **)NULL) { + freeArrNZ((void ***)&(sf->labels),sf->no_labels); + sf->labels = (char **)NULL; + sf->no_labels = -1; + } + if (sf->data_info != (long *)NULL) { + freeArrNZ((void ***)&(sf->data),sf->data_info[ROW]); + free(sf->data_info); + sf->data = (double **)NULL; + sf->data_info = (long *)NULL; + } +} + +DllExport void +SfShow (SpecFile *sf) { + printf("<Showing Info> - specfile: %s\n",sf->sfname); + printf(" - no_scans: %ld\n",sf->no_scans); + printf(" - current: %ld\n",((SpecScan*)sf->current->contents)->scan_no); + printf(" Cursor:\n"); + printf(" - no_scans: %ld\n",sf->cursor.scanno); + printf(" - bytecnt: %ld\n",sf->cursor.bytecnt); +} + +DllExport void +SfShowScan (SpecFile *sf, long index) { + int error; + SpecScan *scan; + + printf("<Showing Info> - specfile: %s / idx %ld\n",sf->sfname,index); + + if (sfSetCurrent(sf,index,&error) == -1) { + printf("Cannot get scan index %ld\n",index); + } + + scan = (SpecScan *) sf->current->contents; + + printf(" - index: %ld\n",scan->index); + printf(" - scan_no: %ld\n",scan->scan_no); + printf(" - offset: %ld\n",scan->offset); + printf(" - data_offset: %ld\n",scan->data_offset); +} diff --git a/src/silx/io/specfile/src/sfwrite.c b/src/silx/io/specfile/src/sfwrite.c new file mode 100644 index 0000000..c77f400 --- /dev/null +++ b/src/silx/io/specfile/src/sfwrite.c @@ -0,0 +1,592 @@ +# /*########################################################################## +# Copyright (C) 1995-2017 European Synchrotron Radiation Facility +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. +# +# ############################################################################*/ +/************************************************************************ + * + * File: sfwrite.c + * + * Project: SpecFile library + * + * Description: Functions for scan output + * + * Author: V.Rey + * + * Date: $Date: 2003/09/12 13:20:35 $ + * + ************************************************************************/ +/* + * Log: $Log: sfwrite.c,v $ + * Log: Revision 1.1 2003/09/12 13:20:35 rey + * Log: Initial revision + * Log: + * Log: Revision 3.0 2000/12/20 14:17:19 rey + * Log: Python version available + * Log: + * Revision 2.1 2000/07/31 19:05:14 19:05:14 rey (Vicente Rey-Bakaikoa) + * SfUpdate and bug corrected in ReadIndex + * + * Revision 2.0 2000/04/13 13:28:54 13:28:54 rey (Vicente Rey-Bakaikoa) + * New version of the library. Complete rewrite + * Adds support for MCA + */ +#include <SpecFile.h> +#include <SpecFileP.h> +#ifndef WIN32 +#include <unistd.h> +#endif +/* + * Declarations + */ +DllExport SpecFileOut *SfoInit ( SpecFile *sf, int *error ); +DllExport void SfoClose ( SpecFileOut *sfo ); +DllExport long SfoSelectAll ( SpecFileOut *sfo, int *error ); +DllExport long SfoSelectOne ( SpecFileOut *sfo, long index, + int *error ); +DllExport long SfoSelect ( SpecFileOut *sfo, long *list, + int *error ); +DllExport long SfoSelectRange ( SpecFileOut *sfo, long begin, + long end, int *error ); +DllExport long SfoRemoveOne ( SpecFileOut *sfo, long index, + int *error ); +DllExport long SfoRemove ( SpecFileOut *sfo, long *list, + int *error ); +DllExport long SfoRemoveRange ( SpecFileOut *sfo, long begin, + long end, int *error ); +DllExport long SfoRemoveAll ( SpecFileOut *sfo, int *error ); +DllExport long SfoWrite ( SpecFileOut *sfo, char *name, + int *error ); +DllExport long SfoGetList ( SpecFileOut *sfo, long **list, + int *error ); + +/* + * Internal functions + */ +static int sfoWriteOne(SpecFileOut *sfo,int output, long index,int *error); + + +/********************************************************************* + * Function: SpecFileOut *SfoInit( sf, error ) + * + * Description: Initializes a SpecFileOut structure: + * - pointer to SpecFile + * - list of scans to be copied + * - size of this list + * - last written file header + * Parameters: + * Input : (1) SpecFile pointer + * + * Output: + * (2) error number + * Returns: + * Pointer to the initialized SpecFileOut structure. + * NULL in case of an error. + * + * Possible errors: + * SF_ERR_MEMOREY_ALLOC + * + * Remark: This function MUST be the FIRST called before + * any other WRITE function is called ! + * + *********************************************************************/ +DllExport SpecFileOut * +SfoInit( SpecFile *sf, int *error ) +{ + SpecFileOut *sfo; + + /* + * Alloc memory + */ + sfo = (SpecFileOut *) malloc ( sizeof(SpecFileOut) ); + + if ( sfo == (SpecFileOut *)NULL ) { + *error = SF_ERR_MEMORY_ALLOC; + return( (SpecFileOut *)NULL ); + } + + /* + * Initialize + */ + sfo->sf = sf; + sfo->list = (long *)NULL; + sfo->list_size = 0; + sfo->file_header = -1; + + return( sfo ); +} + + +/********************************************************************* + * Function: long SfoGetList( sfo, list, error ) + * + * Description: Makes a copy of the SpecFileOut list. + * + * Parameters: + * Input : (1) SpecFileOut pointer + * + * Output: (2) Copy of the output list of spec scan indices. + * (3) error code + * Returns: + * Number of scan indices in the output list , + * ( 0 ) => list empty( (long *)NULL ) ), no errors + * ( -1 ) in case of an error. + * + * Possible errors: + * SF_ERR_MEMOREY_ALLOC + * + * Remark: The memory allocated should be freed by the application + * + *********************************************************************/ +DllExport long +SfoGetList( SpecFileOut *sfo, long **list, int *error ) +{ + long i; + + *list = (long *)NULL; + + if ( sfo->list_size > 0 ) { + *list = (long *)malloc( sfo->list_size * sizeof(long) ); + if ( *list == (long *)NULL ) { + *error = SF_ERR_MEMORY_ALLOC; + return( -1 ); + } + for ( i=0 ; i < sfo->list_size ; i++ ) { + (*list)[i] = sfo->list[i]; + } + } else *list = (long *)NULL; + + return( sfo->list_size ); +} + + +/********************************************************************* + * Function: long SfoSelectOne( sfo, index, error ) + * + * Description: Adds one scan index to the SpecFileOut list. + * + * Parameters: + * Input : (1) SpecFileOut pointer + * (2) Scan index + * Output: + * (3) error code + * Returns: + * ( -1 ) => error + * Number of scan indices in the SpecFileOut list. + * + * Possible errors: + * SF_ERR_MEMORY_ALLOC + * + *********************************************************************/ +DllExport long +SfoSelectOne( SpecFileOut *sfo, long index, int *error ) +{ + long i; + + /* + * Check if index exists or if it's out of range. + */ + if ( index > sfo->sf->no_scans || index < 1 ) { + return( sfo->list_size ); + } + + /* + * Alloc memory for the new index and add it to the list. + */ + if ( sfo->list == (long *)NULL ) { + sfo->list = (long *)malloc( sizeof(long) ); + if ( sfo->list == (long *)NULL ) { + *error = SF_ERR_MEMORY_ALLOC; + return( -1 ); + } + sfo->list_size = 1; + } else { + /* + * Is the new index already in list ? + */ + for ( i=0 ; i<sfo->list_size ; i++ ) + if ( index == sfo->list[i] ) return( sfo->list_size ); + sfo->list = realloc( sfo->list, ++(sfo->list_size) * sizeof(long) ); + if ( sfo->list == (long *)NULL ) { + *error = SF_ERR_MEMORY_ALLOC; + sfo->list_size = 0; + return( -1 ); + } + } + sfo->list[sfo->list_size-1] = index; + printf("Adding scan %ld\n",index); + + return( sfo->list_size ); +} + + +/********************************************************************* + * Function: long SfoSelect( sfo, list, error ) + * + * Description: Adds several scan indices to the SpecFileOut list. + * + * Parameters: + * Input : (1) SpecFileOut pointer + * (2) List scan indices (!The last element + * MUST be a '0' !) + * Output: + * (3) error code + * Returns: + * ( -1 ) => error + * Number of scan indices in the SpecFileOut list. + * + * Possible errors: + * SF_ERR_MEMORY_ALLOC | => SfoSelectOne() + * + *********************************************************************/ +DllExport long +SfoSelect( SpecFileOut *sfo, long *list, int *error ) +{ + for ( ; *list != 0 ; list++ ) { + if ( SfoSelectOne( sfo, *list , error ) < 0 ) return( -1 ); + } + return( sfo->list_size ); +} + + +/********************************************************************* + * Function: long SfoSelectRange( sfo, begin, end, error ) + * + * Description: Adds scan indices between 'begin' and 'end' + * to the SpecFileOut list. + * + * Parameters: + * Input : (1) SpecFileOut pointer + * (2) First ... + * (3) Last index to be added + * Output: + * (4) error code + * Returns: + * ( -1 ) => error + * Number of scan indices in the SpecFileOut list. + * + * Possible errors: + * SF_ERR_MEMORY_ALLOC | => SfoSelectOne() + * + *********************************************************************/ +DllExport long +SfoSelectRange( SpecFileOut *sfo, long begin, long end, int *error ) +{ + long i; + + if ( begin > end ) { + i=begin; + begin = end; + end = i; + } + if ( begin < 1 || end > sfo->sf->no_scans ) { + return( sfo->list_size ); + } + for ( i=begin ; i<=end ; i++ ) { + if ( SfoSelectOne( sfo, i , error ) < 0 ) return( -1 ); + } + return( sfo->list_size ); +} + + +/********************************************************************* + * Function: long SfoSelectAll( sfo, error ) + * + * Description: Writes all scan indices in the SpecFileOut list. + * + * Parameters: + * Input : (1) SpecFileOutput pointer + * Output: (2) error number + * Returns: + * ( -1 ) => error + * Number of scan indices in the SpecFileOut list. + * + * Possible errors: + * SF_ERR_MEMORY_ALLOC + * + *********************************************************************/ +DllExport long +SfoSelectAll( SpecFileOut *sfo, int *error ) +{ + long i; + + if ( sfo->sf->no_scans > 0 ) { + for ( i=1 ; i<=sfo->sf->no_scans ; i++ ) { + if ( SfoSelectOne( sfo, i , error ) < 0 ) return( -1 ); + } + } + return( sfo->list_size ); +} + + +/********************************************************************* + * Function: long SfoRemoveOne( sfo, index, error ) + * + * Description: Removes one scan index from the SpecFileOut list. + * + * Parameters: + * Input : (1) SpecFileOut pointer + * (2) Scan index to be removed + * Output: + * (3) error code + * Returns: + * Number of scans left , + * ( 0 ) => list empty( (long *)NULL ) ), no errors + * ( -1 ) => error. + * + * Possible errors: + * SF_ERR_MEMORY_ALLOC + * + *********************************************************************/ +DllExport long +SfoRemoveOne( SpecFileOut *sfo, long index, int *error ) +{ + long i; + int found = 0; + + /* + * Look for scan index and delete. + */ + for ( i=0 ; i < (sfo->list_size - found) ; i++ ) { + if ( sfo->list[i] == index ) found = 1; + if ( found ) sfo->list[i]=sfo->list[i+1]; + } + + /* + * Free unused memory + */ + if ( found ) { + (sfo->list_size)--; + sfo->list = realloc( sfo->list, sfo->list_size * sizeof(long) ); + if ( sfo->list == (long *)NULL && sfo->list_size != 0 ) { + *error = SF_ERR_MEMORY_ALLOC; + return( -1 ); + } + } + return( sfo->list_size ); +} + + +/********************************************************************* + * Function: long SfoRemove( sfo, list, error ) + * + * Description: Removes several scans indices from the + * SpecFileOut list. + * + * Parameters: + * Input : (1) SpecFileOut pointer + * (2) List of scan indices to be removed + * ( !!! The last element MUST be a '0' !!! ) + * Output: + * (3) error code + * Returns: + * Number of scan indices left , + * ( 0 ) => list empty( (long *)NULL ) ), no errors + * ( -1 ) => error. + * + * Possible errors: + * SF_ERR_MEMORY_ALLOC | => SfoRemoveOne() + * + *********************************************************************/ +DllExport long +SfoRemove( SpecFileOut *sfo, long *list, int *error ) +{ + for ( ; *list != 0 ; list++ ) { + if ( SfoRemoveOne( sfo, *list , error ) < 0 ) return( -1 ); + } + return( sfo->list_size ); +} + + +/********************************************************************* + * Function: long SfoRemoveRange( sfo, begin, end, error ) + * + * Description: Removes scans indices from 'begin' to 'end' + * from the SpecFileOut list. + * + * Parameters: + * Input : + * (1) SpecFileOut pointer + * (2) First ... + * (3) Last index to be removed + * Output: + * (4) error code + * Returns: + * Number of scan indices left , + * ( 0 ) => list empty( (long *)NULL ) ), no errors + * ( -1 ) => error. + * + * Possible errors: + * SF_ERR_MEMORY_ALLOC | => SfoRemoveOne() + * + *********************************************************************/ +DllExport long +SfoRemoveRange( SpecFileOut *sfo, long begin, long end, int *error ) +{ + long i; + + if ( begin > end ) { + i=begin; + begin = end; + end = i; + } + if ( begin < 1 || end > sfo->sf->no_scans ) { + return( sfo->list_size ); + } + for ( i=begin ; i <= end ; i++ ) { + if ( SfoRemoveOne( sfo, i, error ) < 0 ) return( -1 ); + } + return( sfo->list_size ); +} + + +/********************************************************************* + * Function: long SfoRemoveAll( sfo, error ) + * + * Description: Removes all scans indices + * from the SpecFileOut list. + * + * Parameters: + * Input : + * (1) SpecFileOut pointer + * Output: + * (2) error code + * Returns: + * ( 0 ) => OK + * + *********************************************************************/ +DllExport long +SfoRemoveAll( SpecFileOut *sfo, int *error ) +{ + free( sfo->list ); + sfo->list = (long *)NULL; + sfo->list_size = 0; + sfo->file_header = -1; + return( 0 ); +} + + +/********************************************************************* + * Function: int SfoWrite( sfo, name, error ) + * + * Description: Writes (appends) SpecScans specified in the sfo->list + * in the file 'name'. Related file headers are copied + * too. + * Parameters: + * Input : (1) SpecFileOut pointer + * (2) Output file name + * Output: + * (3) error number + * Returns: + * Number of written scans, + * (-1 ) => Errors occured + * Possible errors: + * SF_ERR_FILE_WRITE | => cpyBlock() + * SF_ERR_FILE_READ + * SF_ERR_FILE_OPEN + * SF_ERR_FILE_CLOSE + * + *********************************************************************/ +DllExport long +SfoWrite( SpecFileOut *sfo, char *name, int *error ) +{ + int output; + long i; + + if ( sfo == (SpecFileOut *)NULL || sfo->list_size<1 ) return( 0 ); + + /* + * Open file + */ + if ( (output = open(name, O_CREAT | O_RDWR | O_APPEND, SF_UMASK )) == (int)NULL ) { + *error = SF_ERR_FILE_OPEN; + return( -1 ); + } + + for ( i=0 ; i < sfo->list_size ; i++ ) + sfoWriteOne(sfo,output,sfo->list[i],error); + + if ( close( output ) ) { + *error = SF_ERR_FILE_CLOSE; + return( -1 ); + } + + return( sfo->list_size ); +} + + +/********************************************************************* + * Function: int SfoClose( sfo ) + * + * Description: Frees all memory used by + * SpecFileOut structure. + * Parameters: + * Input : (1) SpecFileOut pointer + * + * Remark: This function should be called after all + * writing operations. + * + *********************************************************************/ +DllExport void +SfoClose( SpecFileOut *sfo ) +{ + /* + * Free memory. + */ + free( sfo->list ); + free( sfo ); +} + + +static int +sfoWriteOne(SpecFileOut *sfo,int output,long index,int *error) +{ + long file_header,size; + SpecFile *sf; + + if ( sfSetCurrent(sfo->sf,index,error) == -1 ) { + *error = SF_ERR_SCAN_NOT_FOUND; + return(-1); + } + + /* + * File header + */ + sf = sfo->sf; + + file_header = ((SpecScan *)sf->current->contents)->size; + + if (file_header != -1 && file_header != sfo->file_header ) { + printf("Writing %ld bytes\n",sf->filebuffersize); + write(output, (void *) sf->filebuffer, sf->filebuffersize); + sfo->file_header = file_header; + } + + /* + * write scan + */ + size = ((SpecScan *)sf->current->contents)->size; + + if ( write(output,(void *) sf->scanbuffer,size) == -1 ) { + *error = SF_ERR_FILE_WRITE; + return(-1); + } + return(0); +} diff --git a/src/silx/io/specfile_wrapper.pxd b/src/silx/io/specfile_wrapper.pxd new file mode 100644 index 0000000..6770f7e --- /dev/null +++ b/src/silx/io/specfile_wrapper.pxd @@ -0,0 +1,77 @@ +# coding: utf-8 +# /*########################################################################## +# Copyright (C) 2016 European Synchrotron Radiation Facility +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. +# +# ############################################################################*/ + +__authors__ = ["P. Knobel"] +__license__ = "MIT" +__date__ = "17/03/2016" + +cimport cython + +cdef extern from "SpecFileCython.h": + struct _SpecFile: + pass +# Renaming struct because we have too many SpecFile items (files, classes…) +ctypedef _SpecFile SpecFileHandle + +cdef extern from "SpecFileCython.h": + # sfinit + SpecFileHandle* SfOpen(char*, int*) + int SfClose(SpecFileHandle*) + char* SfError(int) + + # sfindex + long* SfList(SpecFileHandle*, int*) + long SfScanNo(SpecFileHandle*) + long SfIndex(SpecFileHandle*, long, long) + long SfNumber(SpecFileHandle*, long) + long SfOrder(SpecFileHandle*, long) + + # sfdata + int SfData(SpecFileHandle*, long, double***, long**, int*) + long SfDataLine(SpecFileHandle*, long, long, double**, int*) + long SfDataColByName(SpecFileHandle*, long, char*, double**, int*) + + # sfheader + #char* SfTitle(SpecFileHandle*, long, int*) + long SfHeader(SpecFileHandle*, long, char*, char***, int*) + long SfFileHeader(SpecFileHandle*, long, char*, char***, int*) + char* SfCommand(SpecFileHandle*, long, int*) + long SfNoColumns(SpecFileHandle*, long, int*) + char* SfDate(SpecFileHandle*, long, int*) + + # sflabel + long SfAllLabels(SpecFileHandle*, long, char***, int*) + char* SfLabel(SpecFileHandle*, long, long, int *) + long SfAllMotors(SpecFileHandle*, long, char***, int*) + long SfAllMotorPos(SpecFileHandle*, long, double**, int*) + double SfMotorPosByName(SpecFileHandle*, long, char*, int*) + + # sftools + void freeArrNZ(void***, long) + + # sfmca + long SfNoMca(SpecFileHandle*, long, int*) + int SfGetMca(SpecFileHandle*, long, long , double**, int*) + long SfMcaCalib(SpecFileHandle*, long, double**, int*) + diff --git a/src/silx/io/specfilewrapper.py b/src/silx/io/specfilewrapper.py new file mode 100644 index 0000000..01e185c --- /dev/null +++ b/src/silx/io/specfilewrapper.py @@ -0,0 +1,371 @@ +# coding: utf-8 +# /*######################################################################### +# Copyright (C) 2016-2018 European Synchrotron Radiation Facility +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. +# +# ############################################################################*/ +"""This module provides a backward compatibility layer with the legacy +specfile wrapper. + +If you are starting a new project, please consider using :mod:`silx.io.specfile` +instead of this module. + +If you want to use this module for an existing project that used the old +wrapper through PyMca, you can try replacing:: + + from PyMca5.PyMcaIO import specfilewrapper + +with:: + + from silx.io import specfilewrapper + +There might still be differences between this module and the old +wrapper, due to differences in the underlying implementation. +Any of these differences that break your code should be reported on +https://github.com/silx-kit/silx/issues + +The documentation mentions only the methods and attributes that are different +from the ones in :class:`silx.io.specfile.SpecFile` and +:class:`silx.io.specfile.Scan`. You should refer to the documentation of these +base classes for more information. +""" +from silx.io.specfile import SpecFile, Scan + +__authors__ = ["P. Knobel"] +__license__ = "MIT" +__date__ = "15/09/2016" + + +def _format_number_list(number_list): + """Return string representation of a list of integers, + using ``,`` as a separator and ``:`` as a range separator. + """ + ret = "" + first_in_range = number_list[0] + last_in_range = number_list[0] + previous = number_list[0] + for number in number_list[1:]: + if number - previous != 1: + # reached end of range + if last_in_range > first_in_range: + ret += "%d:%d," % (first_in_range, last_in_range) + # passed isolated number + else: + ret += "%d," % previous + # reinitialize range + first_in_range = number + last_in_range = number + else: + # still inside a continuous range + last_in_range = number + + previous = number + + # last number + if last_in_range > first_in_range: + ret += "%d:%d" % (first_in_range, last_in_range) + else: + ret += "%d" % previous + + return ret + + +class Specfile(SpecFile): + """ + This class is a subclass of :class:`silx.io.specfile.SpecFile`. + + It redefines following methods: + + - :meth:`__getitem__`: returns a :class:`scandata` object instead of + a :class:`silx.io.specfile.Scan` object + - :meth:`list`: returns a string representation of a list instead of a + list of integers + + Following methods are added: + + - :meth:`select` + - :meth:`scanno` + - :meth:`allmotors` + - :meth:`epoch` + - :meth:`title` + """ + def __init__(self, filename): + SpecFile.__init__(self, filename) + + def __getitem__(self, key): + """Get scan by 0-based index + + :param key: 0-based scan index + :type key: int + + :return: Scan + :rtype: :class:`scandata` + """ + if not isinstance(key, int): + raise TypeError("Scan index must be an integer") + + scan_index = key + # allow negative index, like lists + if scan_index < 0: + scan_index += len(self) + + if not 0 <= scan_index < len(self): + msg = "Scan index must be in range 0-%d" % (len(self) - 1) + raise IndexError(msg) + + return scandata(self, scan_index) + + def list(self): + """Return a string representation of a list of scan numbers. + + The scans numbers are listed in the order in which they appear + in the file. Continuous ranges of scan numbers are represented + as ``first:last``. + + For instance, let's assume our specfile contains following scans: + *1, 2, 3, 4, 5, 684, 685, 687, 688, 689, 700, 688, 688*. + This method will then return:: + + "1:5,684:685,687:689,700,688,688" + """ + number_list = SpecFile.list(self) + return _format_number_list(number_list) + + def select(self, key): + """Get scan by ``n.m`` key + + :param key: ``"s.o"`` (scan number, scan order) + :type key: str + :return: Scan + :rtype: :class:`scandata` + """ + msg = "Key must be a string 'N.M' with N being the scan" + msg += " number and M the order (eg '2.3')." + + if not hasattr(key, "lower") or "." not in key: + raise TypeError(msg) + + try: + (number, order) = map(int, key.split(".")) + scan_index = self.index(number, order) + except (ValueError, IndexError): + # self.index can raise an index error + # int() can raise a value error + raise KeyError(msg + "\nValid keys: '" + + "', '".join(self.keys()) + "'") + except AttributeError: + # e.g. "AttrErr: 'float' object has no attribute 'split'" + raise TypeError(msg) + + if not 0 <= scan_index < len(self): + msg = "Scan index must be in range 0-%d" % (len(self) - 1) + raise IndexError(msg) + + return scandata(self, scan_index) + + def scanno(self): + """Return the number of scans in the SpecFile + + This is an alias for :meth:`__len__`, for compatibility with the old + specfile wrapper API. + """ + return len(self) + + def allmotors(self, scan_index=0): + """ + This is an alias for :meth:`motor_names`, for compatibility with + the old specfile wrapper API. + """ + return self.motor_names(scan_index) + + def epoch(self): + """:return: Epoch, from last word on file header line *#E* + :rtype: int + :raise: ValueError if *#E* line not found in header or last + word on *#E* cannot be converted to type *int*""" + fh = self.file_header() + for line in fh: + if line.startswith("#E "): + return int(line.split()[-1]) + raise ValueError("No #E header found in specfile") + + def title(self): + """:return: Title, from second field on *#C* header line (field are + strings separated by two spaces) + :rtype: str + :raise: ValueError if *#C* line not found in header or line is empty""" + fh = self.file_header() + for line in fh: + if line.startswith("#C "): + line1 = line.lstrip("#C ") + return line1.split(" ")[0] + raise ValueError("No #C header found in specfile") + + # # these functions exist in the old API but don't seem to be + # # used, and are not easy to implement + # def show(self): + # raise NotImplementedError + # + # def user(self): + # raise NotImplementedError + # + # def update(self): + # raise NotImplementedError + + +# PEP8 violation in class name is to respect old API +class scandata(Scan): # noqa + """ + This class is a subclass of :class:`silx.io.specfile.Scan`. + + It redefines following methods/attributes: + + - :meth:`data` becomes a method returning an array, instead of just + an array + - :meth:`mca`: becomes a method returning an array, instead of + a :class:`silx.io.specfile.MCA` object + - :meth:`header`: becomes a method returning a list of **scan** + header lines (or a list of a single header line, if a key is + specified), instead of just a list of all header lines + + Following methods are added: + + - :meth:`allmotors` + - :meth:`allmotorpos` + - :meth:`alllabels` + - :meth:`cols` + - :meth:`lines` + - :meth:`command` + - :meth:`date` + - :meth:`datacol` + - :meth:`dataline` + - :meth:`fileheader` + - :meth:`nbmca` + """ + def __init__(self, specfile, scan_index): + Scan.__init__(self, specfile, scan_index) + + def allmotors(self): + """Return a list of all motor names (identical to + :attr:`motor_names`). + """ + return self.motor_names + + def allmotorpos(self): + """Return a list of all motor positions (identical to + :attr:`motor_positions`). + """ + return self.motor_positions + + def alllabels(self): + """ + Return a list of all labels (:attr:`labels`). + """ + return self.labels + + def cols(self): + """Return the number of data columns (number of detectors)""" + return super(scandata, self).data.shape[1] + + def command(self): + """Return the command called for this scan (``#S`` header line)""" + return self._specfile.command(self._index) + + def data(self): + """Return the data in this scan as a 2D numpy array. + + The first index corresponds to the columns/detectors in the original + file, and the second index is the row index from the original file. + Indices are 0-based. + + For instance, this is how you access the 18th data sample for the 3rd + detector (assuming ``sc`` is your scan object): + + >>> scdata = sc.data() + >>> data_sample = scdata[2, 17]""" + return super(scandata, self).data + + def datacol(self, col): + """Return a data column (all data for one detector) + + :param col: column number (1-based index)""" + return super(scandata, self).data[col - 1, :] + + def dataline(self, line): + """Return a data line (one sample for all detectors) + + :param line: line number (1-based index)""" + return super(scandata, self).data[:, line - 1] + + def date(self): + """Return the date from the scan header line ``#D``""" + return self._specfile.date(self._index) + + def fileheader(self, key=''): # noqa + """Return a list of file header lines""" + # key is there for compatibility + return self.file_header + + def header(self, key=""): + """Return a list of scan header lines if no key is specified. + If a valid key is specified, return a list of a single header line. + + :param key: Header key (e.g. ``S, N, L, @CALIB``…) + If ``key`` is an empty string, return complete list of scan header + lines. + If ``key`` does not match any header line, return empty list. + :return: List of scan header lines + :rtype: List[str] + """ + if key.strip() == "": + return self.scan_header + if self.record_exists_in_hdr(key): + prefix = "#" + key + " " + # there is no leading @ in self.mca_header_dict keys + key_mca_dict = key.lstrip("@") if key.startswith("@") else None + if key_mca_dict in self.mca_header_dict: + return [prefix + self.mca_header_dict[key_mca_dict]] + elif key in self.scan_header_dict: + return [prefix + self.scan_header_dict[key]] + elif key in self.file_header_dict: + return [prefix + self.file_header_dict[key]] + elif self.record_exists_in_hdr("@" + key): + # in case key is a mca header key without the @ + if key in self.mca_header_dict: + prefix = "#@" + key + " " + return [prefix + self.mca_header_dict[key]] + return [] + + def lines(self): + """Return the number of data lines (number of data points per + detector)""" + return super(scandata, self).data.shape[0] + + def mca(self, number): + """Return one MCA spectrum + + :param number: MCA number (1-based index) + :rtype: 1D numpy array""" + # in the base class, mca is an object that can be indexed (but 0-based) + return super(scandata, self).mca[number - 1] + + def nbmca(self): + """Return number of MCAs in this scan""" + return len(super(scandata, self).mca) diff --git a/src/silx/io/spech5.py b/src/silx/io/spech5.py new file mode 100644 index 0000000..df2021c --- /dev/null +++ b/src/silx/io/spech5.py @@ -0,0 +1,907 @@ +# coding: utf-8 +# /*########################################################################## +# Copyright (C) 2016-2021 European Synchrotron Radiation Facility +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. +# +# ############################################################################*/ +"""This module provides a h5py-like API to access SpecFile data. + +API description ++++++++++++++++ + +Specfile data structure exposed by this API: + +:: + + / + 1.1/ + title = "…" + start_time = "…" + instrument/ + specfile/ + file_header = "…" + scan_header = "…" + positioners/ + motor_name = value + … + mca_0/ + data = … + calibration = … + channels = … + preset_time = … + elapsed_time = … + live_time = … + + mca_1/ + … + … + measurement/ + colname0 = … + colname1 = … + … + mca_0/ + data -> /1.1/instrument/mca_0/data + info -> /1.1/instrument/mca_0/ + … + sample/ + ub_matrix = … + unit_cell = … + unit_cell_abc = … + unit_cell_alphabetagamma = … + 2.1/ + … + +``file_header`` and ``scan_header`` are the raw headers as they +appear in the original file, as a string of lines separated by newline (``\\n``) characters. + +The title is the content of the ``#S`` scan header line without the leading +``#S`` and without the scan number (e.g ``"ascan ss1vo -4.55687 -0.556875 40 0.2"``). + +The start time is converted to ISO8601 format (``"2016-02-23T22:49:05Z"``), +if the original date format is standard. + +Numeric datasets are stored in *float32* format, except for scalar integers +which are stored as *int64*. + +Motor positions (e.g. ``/1.1/instrument/positioners/motor_name``) can be +1D numpy arrays if they are measured as scan data, or else scalars as defined +on ``#P`` scan header lines. A simple test is done to check if the motor name +is also a data column header defined in the ``#L`` scan header line. + +Scan data (e.g. ``/1.1/measurement/colname0``) is accessed by column, +the dataset name ``colname0`` being the column label as defined in the ``#L`` +scan header line. + +If a ``/`` character is present in a column label or in a motor name in the +original SPEC file, it will be substituted with a ``%`` character in the +corresponding dataset name. + +MCA data is exposed as a 2D numpy array containing all spectra for a given +analyser. The number of analysers is calculated as the number of MCA spectra +per scan data line. Demultiplexing is then performed to assign the correct +spectra to a given analyser. + +MCA calibration is an array of 3 scalars, from the ``#@CALIB`` header line. +It is identical for all MCA analysers, as there can be only one +``#@CALIB`` line per scan. + +MCA channels is an array containing all channel numbers. This information is +computed from the ``#@CHANN`` scan header line (if present), or computed from +the shape of the first spectrum in a scan (``[0, … len(first_spectrum] - 1]``). + +Accessing data +++++++++++++++ + +Data and groups are accessed in :mod:`h5py` fashion:: + + from silx.io.spech5 import SpecH5 + + # Open a SpecFile + sfh5 = SpecH5("test.dat") + + # using SpecH5 as a regular group to access scans + scan1group = sfh5["1.1"] + instrument_group = scan1group["instrument"] + + # alternative: full path access + measurement_group = sfh5["/1.1/measurement"] + + # accessing a scan data column by name as a 1D numpy array + data_array = measurement_group["Pslit HGap"] + + # accessing all mca-spectra for one MCA device + mca_0_spectra = measurement_group["mca_0/data"] + +:class:`SpecH5` files and groups provide a :meth:`keys` method:: + + >>> sfh5.keys() + ['96.1', '97.1', '98.1'] + >>> sfh5['96.1'].keys() + ['title', 'start_time', 'instrument', 'measurement'] + +They can also be treated as iterators: + +.. code-block:: python + + from silx.io import is_dataset + + for scan_group in SpecH5("test.dat"): + dataset_names = [item.name in scan_group["measurement"] if + is_dataset(item)] + print("Found data columns in scan " + scan_group.name) + print(", ".join(dataset_names)) + +You can test for existence of data or groups:: + + >>> "/1.1/measurement/Pslit HGap" in sfh5 + True + >>> "positioners" in sfh5["/2.1/instrument"] + True + >>> "spam" in sfh5["1.1"] + False + +.. note:: + + Text used to be stored with a dtype ``numpy.string_`` in silx versions + prior to *0.7.0*. The type ``numpy.string_`` is a byte-string format. + The consequence of this is that you had to decode strings before using + them in **Python 3**:: + + >>> from silx.io.spech5 import SpecH5 + >>> sfh5 = SpecH5("31oct98.dat") + >>> sfh5["/68.1/title"] + b'68 ascan tx3 -28.5 -24.5 20 0.5' + >>> sfh5["/68.1/title"].decode() + '68 ascan tx3 -28.5 -24.5 20 0.5' + + From silx version *0.7.0* onwards, text is now stored as unicode. This + corresponds to the default text type in python 3, and to the *unicode* + type in Python 2. + + To be on the safe side, you can test for the presence of a *decode* + attribute, to ensure that you always work with unicode text:: + + >>> title = sfh5["/68.1/title"] + >>> if hasattr(title, "decode"): + ... title = title.decode() + +""" + +import datetime +import logging +import re +import io + +import h5py +import numpy + +from silx import version as silx_version +from .specfile import SpecFile, SfErrColNotFound +from . import commonh5 + +__authors__ = ["P. Knobel", "D. Naudet"] +__license__ = "MIT" +__date__ = "17/07/2018" + +logger1 = logging.getLogger(__name__) + + +text_dtype = h5py.special_dtype(vlen=str) + + +def to_h5py_utf8(str_list): + """Convert a string or a list of strings to a numpy array of + unicode strings that can be written to HDF5 as utf-8. + + This ensures that the type will be consistent between python 2 and + python 3, if attributes or datasets are saved to an HDF5 file. + """ + return numpy.array(str_list, dtype=text_dtype) + + +def _get_number_of_mca_analysers(scan): + """ + :param SpecFile sf: :class:`SpecFile` instance + """ + number_of_mca_spectra = len(scan.mca) + # Scan.data is transposed + number_of_data_lines = scan.data.shape[1] + + if not number_of_data_lines == 0: + # Number of MCA spectra must be a multiple of number of data lines + assert number_of_mca_spectra % number_of_data_lines == 0 + return number_of_mca_spectra // number_of_data_lines + elif number_of_mca_spectra: + # Case of a scan without data lines, only MCA. + # Our only option is to assume that the number of analysers + # is the number of #@CHANN lines + return len(scan.mca.channels) + else: + return 0 + + +def _motor_in_scan(sf, scan_key, motor_name): + """ + :param sf: :class:`SpecFile` instance + :param scan_key: Scan identification key (e.g. ``1.1``) + :param motor_name: Name of motor as defined in file header lines + :return: ``True`` if motor exists in scan, else ``False`` + :raise: ``KeyError`` if scan_key not found in SpecFile + """ + if scan_key not in sf: + raise KeyError("Scan key %s " % scan_key + + "does not exist in SpecFile %s" % sf.filename) + ret = motor_name in sf[scan_key].motor_names + if not ret and "%" in motor_name: + motor_name = motor_name.replace("%", "/") + ret = motor_name in sf[scan_key].motor_names + return ret + + +def _column_label_in_scan(sf, scan_key, column_label): + """ + :param sf: :class:`SpecFile` instance + :param scan_key: Scan identification key (e.g. ``1.1``) + :param column_label: Column label as defined in scan header + :return: ``True`` if data column label exists in scan, else ``False`` + :raise: ``KeyError`` if scan_key not found in SpecFile + """ + if scan_key not in sf: + raise KeyError("Scan key %s " % scan_key + + "does not exist in SpecFile %s" % sf.filename) + ret = column_label in sf[scan_key].labels + if not ret and "%" in column_label: + column_label = column_label.replace("%", "/") + ret = column_label in sf[scan_key].labels + return ret + + +def _parse_UB_matrix(header_line): + """Parse G3 header line and return UB matrix + + :param str header_line: G3 header line + :return: UB matrix + :raises ValueError: For malformed UB matrix header line + """ + values = list(map(float, header_line.split())) # Can raise ValueError + if len(values) < 9: + raise ValueError("Not enough values in UB matrix") + return numpy.array(values).reshape((1, 3, 3)) + + +def _ub_matrix_in_scan(scan): + """Return True if scan header has a G3 line and all values are not 0. + + :param scan: specfile.Scan instance + :return: True or False + """ + header_line = scan.scan_header_dict.get("G3", None) + if header_line is None: + return False + try: + ub_matrix = _parse_UB_matrix(header_line) + except ValueError: + logger1.warning("Malformed G3 header line") + return False + return numpy.any(ub_matrix) + + +def _parse_unit_cell(header_line): + """Parse G1 header line and return unit cell + + :param str header_line: G1 header line + :return: unit cell + :raises ValueError: For malformed unit cell header line + """ + values = list(map(float, header_line.split()[0:6])) # can raise ValueError + if len(values) < 6: + raise ValueError("Not enough values in unit cell") + return numpy.array(values).reshape((1, 6)) + + +def _unit_cell_in_scan(scan): + """Return True if scan header has a G1 line and all values are not 0. + + :param scan: specfile.Scan instance + :return: True or False + """ + header_line = scan.scan_header_dict.get("G1", None) + if header_line is None: + return False + try: + unit_cell = _parse_unit_cell(header_line) + except ValueError: + logger1.warning("Malformed G1 header line") + return False + return numpy.any(unit_cell) + + +def _parse_ctime(ctime_lines, analyser_index=0): + """ + :param ctime_lines: e.g ``@CTIME %f %f %f``, first word ``@CTIME`` optional + When multiple CTIME lines are present in a scan header, this argument + is a concatenation of them separated by a ``\\n`` character. + :param analyser_index: MCA device/analyser index, when multiple devices + are in a scan. + :return: (preset_time, live_time, elapsed_time) + """ + ctime_lines = ctime_lines.lstrip("@CTIME ") + ctimes_lines_list = ctime_lines.split("\n") + if len(ctimes_lines_list) == 1: + # single @CTIME line for all devices + ctime_line = ctimes_lines_list[0] + else: + ctime_line = ctimes_lines_list[analyser_index] + if not len(ctime_line.split()) == 3: + raise ValueError("Incorrect format for @CTIME header line " + + '(expected "@CTIME %f %f %f").') + return list(map(float, ctime_line.split())) + + +def spec_date_to_iso8601(date, zone=None): + """Convert SpecFile date to Iso8601. + + :param date: Date (see supported formats below) + :type date: str + :param zone: Time zone as it appears in a ISO8601 date + + Supported formats: + + * ``DDD MMM dd hh:mm:ss YYYY`` + * ``DDD YYYY/MM/dd hh:mm:ss YYYY`` + + where `DDD` is the abbreviated weekday, `MMM` is the month abbreviated + name, `MM` is the month number (zero padded), `dd` is the weekday number + (zero padded) `YYYY` is the year, `hh` the hour (zero padded), `mm` the + minute (zero padded) and `ss` the second (zero padded). + All names are expected to be in english. + + Examples:: + + >>> spec_date_to_iso8601("Thu Feb 11 09:54:35 2016") + '2016-02-11T09:54:35' + + >>> spec_date_to_iso8601("Sat 2015/03/14 03:53:50") + '2015-03-14T03:53:50' + """ + months = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', + 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'] + days = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun'] + + days_rx = '(?P<day>' + '|'.join(days) + ')' + months_rx = '(?P<month>' + '|'.join(months) + ')' + year_rx = r'(?P<year>\d{4})' + day_nb_rx = r'(?P<day_nb>[0-3 ]\d)' + month_nb_rx = r'(?P<month_nb>[0-1]\d)' + hh_rx = r'(?P<hh>[0-2]\d)' + mm_rx = r'(?P<mm>[0-5]\d)' + ss_rx = r'(?P<ss>[0-5]\d)' + tz_rx = r'(?P<tz>[+-]\d\d:\d\d){0,1}' + + # date formats must have either month_nb (1..12) or month (Jan, Feb, ...) + re_tpls = ['{days} {months} {day_nb} {hh}:{mm}:{ss}{tz} {year}', + '{days} {year}/{month_nb}/{day_nb} {hh}:{mm}:{ss}{tz}'] + + grp_d = None + + for rx in re_tpls: + full_rx = rx.format(days=days_rx, + months=months_rx, + year=year_rx, + day_nb=day_nb_rx, + month_nb=month_nb_rx, + hh=hh_rx, + mm=mm_rx, + ss=ss_rx, + tz=tz_rx) + m = re.match(full_rx, date) + + if m: + grp_d = m.groupdict() + break + + if not grp_d: + raise ValueError('Date format not recognized : {0}'.format(date)) + + year = grp_d['year'] + + month = grp_d.get('month_nb') + + if not month: + month = '{0:02d}'.format(months.index(grp_d.get('month')) + 1) + + day = grp_d['day_nb'] + + tz = grp_d['tz'] + if not tz: + tz = zone + + time = '{0}:{1}:{2}'.format(grp_d['hh'], + grp_d['mm'], + grp_d['ss']) + + full_date = '{0}-{1}-{2}T{3}{4}'.format(year, + month, + day, + time, + tz if tz else '') + return full_date + + +def _demultiplex_mca(scan, analyser_index): + """Return MCA data for a single analyser. + + Each MCA spectrum is a 1D array. For each analyser, there is one + spectrum recorded per scan data line. When there are more than a single + MCA analyser in a scan, the data will be multiplexed. For instance if + there are 3 analysers, the consecutive spectra for the first analyser must + be accessed as ``mca[0], mca[3], mca[6]…``. + + :param scan: :class:`Scan` instance containing the MCA data + :param analyser_index: 0-based index referencing the analyser + :type analyser_index: int + :return: 2D numpy array containing all spectra for one analyser + """ + number_of_analysers = _get_number_of_mca_analysers(scan) + number_of_spectra = len(scan.mca) + number_of_spectra_per_analyser = number_of_spectra // number_of_analysers + len_spectrum = len(scan.mca[analyser_index]) + + mca_array = numpy.empty((number_of_spectra_per_analyser, len_spectrum)) + + for i in range(number_of_spectra_per_analyser): + mca_array[i, :] = scan.mca[analyser_index + i * number_of_analysers] + + return mca_array + + +# Node classes +class SpecH5Dataset(object): + """This convenience class is to be inherited by all datasets, for + compatibility purpose with code that tests for + ``isinstance(obj, SpecH5Dataset)``. + + This legacy behavior is deprecated. The correct way to test + if an object is a dataset is to use :meth:`silx.io.utils.is_dataset`. + + Datasets must also inherit :class:`SpecH5NodeDataset` or + :class:`SpecH5LazyNodeDataset` which actually implement all the + API.""" + pass + + +class SpecH5NodeDataset(commonh5.Dataset, SpecH5Dataset): + """This class inherits :class:`commonh5.Dataset`, to which it adds + little extra functionality. The main additional functionality is the + proxy behavior that allows to mimic the numpy array stored in this + class. + """ + def __init__(self, name, data, parent=None, attrs=None): + # get proper value types, to inherit from numpy + # attributes (dtype, shape, size) + if isinstance(data, str): + # use unicode (utf-8 when saved to HDF5 output) + value = to_h5py_utf8(data) + elif isinstance(data, float): + # use 32 bits for float scalars + value = numpy.float32(data) + elif isinstance(data, int): + value = numpy.int_(data) + else: + # Enforce numpy array + array = numpy.array(data) + data_kind = array.dtype.kind + + if data_kind in ["S", "U"]: + value = numpy.asarray(array, + dtype=text_dtype) + elif data_kind in ["f"]: + value = numpy.asarray(array, dtype=numpy.float32) + else: + value = array + commonh5.Dataset.__init__(self, name, value, parent, attrs) + + def __getattr__(self, item): + """Proxy to underlying numpy array methods. + """ + if hasattr(self[()], item): + return getattr(self[()], item) + + raise AttributeError("SpecH5Dataset has no attribute %s" % item) + + +class SpecH5LazyNodeDataset(commonh5.LazyLoadableDataset, SpecH5Dataset): + """This class inherits :class:`commonh5.LazyLoadableDataset`, + to which it adds a proxy behavior that allows to mimic the numpy + array stored in this class. + + The class has to be inherited and the :meth:`_create_data` method has to be + implemented to return the numpy data exposed by the dataset. This factory + method is only called once, when the data is needed. + """ + def __getattr__(self, item): + """Proxy to underlying numpy array methods. + """ + if hasattr(self[()], item): + return getattr(self[()], item) + + raise AttributeError("SpecH5Dataset has no attribute %s" % item) + + def _create_data(self): + """ + Factory to create the data exposed by the dataset when it is needed. + + It has to be implemented for the class to work. + + :rtype: numpy.ndarray + """ + raise NotImplementedError() + + +class SpecH5Group(object): + """This convenience class is to be inherited by all groups, for + compatibility purposes with code that tests for + ``isinstance(obj, SpecH5Group)``. + + This legacy behavior is deprecated. The correct way to test + if an object is a group is to use :meth:`silx.io.utils.is_group`. + + Groups must also inherit :class:`silx.io.commonh5.Group`, which + actually implements all the methods and attributes.""" + pass + + +class SpecH5(commonh5.File, SpecH5Group): + """This class opens a SPEC file and exposes it as a *h5py.File*. + + It inherits :class:`silx.io.commonh5.Group` (via :class:`commonh5.File`), + which implements most of its API. + """ + + def __init__(self, filename): + """ + :param filename: Path to SpecFile in filesystem + :type filename: str + """ + if isinstance(filename, io.IOBase): + # see https://github.com/silx-kit/silx/issues/858 + filename = filename.name + + self._sf = SpecFile(filename) + + attrs = {"NX_class": to_h5py_utf8("NXroot"), + "file_time": to_h5py_utf8( + datetime.datetime.now().isoformat()), + "file_name": to_h5py_utf8(filename), + "creator": to_h5py_utf8("silx spech5 %s" % silx_version)} + commonh5.File.__init__(self, filename, attrs=attrs) + + for scan_key in self._sf.keys(): + scan = self._sf[scan_key] + scan_group = ScanGroup(scan_key, parent=self, scan=scan) + self.add_node(scan_group) + + def close(self): + self._sf.close() + self._sf = None + + +class ScanGroup(commonh5.Group, SpecH5Group): + def __init__(self, scan_key, parent, scan): + """ + + :param parent: parent Group + :param str scan_key: Scan key (e.g. "1.1") + :param scan: specfile.Scan object + """ + commonh5.Group.__init__(self, scan_key, parent=parent, + attrs={"NX_class": to_h5py_utf8("NXentry")}) + + # take title in #S after stripping away scan number and spaces + s_hdr_line = scan.scan_header_dict["S"] + title = s_hdr_line.lstrip("0123456789").lstrip() + self.add_node(SpecH5NodeDataset(name="title", + data=to_h5py_utf8(title), + parent=self)) + + if "D" in scan.scan_header_dict: + try: + start_time_str = spec_date_to_iso8601(scan.scan_header_dict["D"]) + except (IndexError, ValueError): + logger1.warning("Could not parse date format in scan %s header." + + " Using original date not converted to ISO-8601", + scan_key) + start_time_str = scan.scan_header_dict["D"] + elif "D" in scan.file_header_dict: + logger1.warning("No #D line in scan %s header. " + + "Using file header for start_time.", + scan_key) + try: + start_time_str = spec_date_to_iso8601(scan.file_header_dict["D"]) + except (IndexError, ValueError): + logger1.warning("Could not parse date format in scan %s header. " + + "Using original date not converted to ISO-8601", + scan_key) + start_time_str = scan.file_header_dict["D"] + else: + logger1.warning("No #D line in %s header. Setting date to empty string.", + scan_key) + start_time_str = "" + self.add_node(SpecH5NodeDataset(name="start_time", + data=to_h5py_utf8(start_time_str), + parent=self)) + + self.add_node(InstrumentGroup(parent=self, scan=scan)) + self.add_node(MeasurementGroup(parent=self, scan=scan)) + if _unit_cell_in_scan(scan) or _ub_matrix_in_scan(scan): + self.add_node(SampleGroup(parent=self, scan=scan)) + + +class InstrumentGroup(commonh5.Group, SpecH5Group): + def __init__(self, parent, scan): + """ + + :param parent: parent Group + :param scan: specfile.Scan object + """ + commonh5.Group.__init__(self, name="instrument", parent=parent, + attrs={"NX_class": to_h5py_utf8("NXinstrument")}) + + self.add_node(InstrumentSpecfileGroup(parent=self, scan=scan)) + self.add_node(PositionersGroup(parent=self, scan=scan)) + + num_analysers = _get_number_of_mca_analysers(scan) + for anal_idx in range(num_analysers): + self.add_node(InstrumentMcaGroup(parent=self, + analyser_index=anal_idx, + scan=scan)) + + +class InstrumentSpecfileGroup(commonh5.Group, SpecH5Group): + def __init__(self, parent, scan): + commonh5.Group.__init__(self, name="specfile", parent=parent, + attrs={"NX_class": to_h5py_utf8("NXcollection")}) + self.add_node(SpecH5NodeDataset( + name="file_header", + data=to_h5py_utf8(scan.file_header), + parent=self, + attrs={})) + self.add_node(SpecH5NodeDataset( + name="scan_header", + data=to_h5py_utf8(scan.scan_header), + parent=self, + attrs={})) + + +class PositionersGroup(commonh5.Group, SpecH5Group): + def __init__(self, parent, scan): + commonh5.Group.__init__(self, name="positioners", parent=parent, + attrs={"NX_class": to_h5py_utf8("NXcollection")}) + + dataset_info = [] # Store list of positioner's (name, value) + is_error = False # True if error encountered + + for motor_name in scan.motor_names: + safe_motor_name = motor_name.replace("/", "%") + if motor_name in scan.labels and scan.data.shape[0] > 0: + # return a data column if one has the same label as the motor + motor_value = scan.data_column_by_name(motor_name) + else: + # Take value from #P scan header. + # (may return float("inf") if #P line is missing from scan hdr) + try: + motor_value = scan.motor_position_by_name(motor_name) + except SfErrColNotFound: + is_error = True + motor_value = float('inf') + dataset_info.append((safe_motor_name, motor_value)) + + if is_error: # Filter-out scalar values + logger1.warning("Mismatching number of elements in #P and #O: Ignoring") + dataset_info = [ + (name, value) for name, value in dataset_info + if not isinstance(value, float)] + + for name, value in dataset_info: + self.add_node(SpecH5NodeDataset( + name=name, + data=value, + parent=self)) + + +class InstrumentMcaGroup(commonh5.Group, SpecH5Group): + def __init__(self, parent, analyser_index, scan): + name = "mca_%d" % analyser_index + commonh5.Group.__init__(self, name=name, parent=parent, + attrs={"NX_class": to_h5py_utf8("NXdetector")}) + + mcaDataDataset = McaDataDataset(parent=self, + analyser_index=analyser_index, + scan=scan) + self.add_node(mcaDataDataset) + spectrum_length = mcaDataDataset.shape[-1] + mcaDataDataset = None + + if len(scan.mca.channels) == 1: + # single @CALIB line applying to multiple devices + calibration_dataset = scan.mca.calibration[0] + channels_dataset = scan.mca.channels[0] + else: + calibration_dataset = scan.mca.calibration[analyser_index] + channels_dataset = scan.mca.channels[analyser_index] + + channels_length = len(channels_dataset) + if (channels_length > 1) and (spectrum_length > 0): + logger1.info("Spectrum and channels length mismatch") + # this should always be the case + if channels_length > spectrum_length: + channels_dataset = channels_dataset[:spectrum_length] + elif channels_length < spectrum_length: + # only trust first channel and increment + channel0 = channels_dataset[0] + increment = channels_dataset[1] - channels_dataset[0] + channels_dataset = numpy.linspace(channel0, + channel0 + increment * spectrum_length, + spectrum_length, endpoint=False) + + self.add_node(SpecH5NodeDataset(name="calibration", + data=calibration_dataset, + parent=self)) + self.add_node(SpecH5NodeDataset(name="channels", + data=channels_dataset, + parent=self)) + + if "CTIME" in scan.mca_header_dict: + ctime_line = scan.mca_header_dict['CTIME'] + preset_time, live_time, elapsed_time = _parse_ctime(ctime_line, analyser_index) + self.add_node(SpecH5NodeDataset(name="preset_time", + data=preset_time, + parent=self)) + self.add_node(SpecH5NodeDataset(name="live_time", + data=live_time, + parent=self)) + self.add_node(SpecH5NodeDataset(name="elapsed_time", + data=elapsed_time, + parent=self)) + + +class McaDataDataset(SpecH5LazyNodeDataset): + """Lazy loadable dataset for MCA data""" + def __init__(self, parent, analyser_index, scan): + commonh5.LazyLoadableDataset.__init__( + self, name="data", parent=parent, + attrs={"interpretation": to_h5py_utf8("spectrum"),}) + self._scan = scan + self._analyser_index = analyser_index + self._shape = None + self._num_analysers = _get_number_of_mca_analysers(self._scan) + + def _create_data(self): + return _demultiplex_mca(self._scan, self._analyser_index) + + @property + def shape(self): + if self._shape is None: + num_spectra_in_file = len(self._scan.mca) + num_spectra_per_analyser = num_spectra_in_file // self._num_analysers + len_spectrum = len(self._scan.mca[self._analyser_index]) + self._shape = num_spectra_per_analyser, len_spectrum + return self._shape + + @property + def size(self): + return numpy.prod(self.shape, dtype=numpy.intp) + + @property + def dtype(self): + # we initialize the data with numpy.empty() without specifying a dtype + # in _demultiplex_mca() + return numpy.empty((1, )).dtype + + def __len__(self): + return self.shape[0] + + def __getitem__(self, item): + # optimization for fetching a single spectrum if data not already loaded + if not self._is_initialized: + if isinstance(item, int): + if item < 0: + # negative indexing + item += len(self) + return self._scan.mca[self._analyser_index + + item * self._num_analysers] + # accessing a slice or element of a single spectrum [i, j:k] + try: + spectrum_idx, channel_idx_or_slice = item + assert isinstance(spectrum_idx, int) + except (ValueError, TypeError, AssertionError): + pass + else: + if spectrum_idx < 0: + item += len(self) + idx = self._analyser_index + spectrum_idx * self._num_analysers + return self._scan.mca[idx][channel_idx_or_slice] + + return super(McaDataDataset, self).__getitem__(item) + + +class MeasurementGroup(commonh5.Group, SpecH5Group): + def __init__(self, parent, scan): + """ + + :param parent: parent Group + :param scan: specfile.Scan object + """ + commonh5.Group.__init__(self, name="measurement", parent=parent, + attrs={"NX_class": to_h5py_utf8("NXcollection"),}) + for label in scan.labels: + safe_label = label.replace("/", "%") + self.add_node(SpecH5NodeDataset(name=safe_label, + data=scan.data_column_by_name(label), + parent=self)) + + num_analysers = _get_number_of_mca_analysers(scan) + for anal_idx in range(num_analysers): + self.add_node(MeasurementMcaGroup(parent=self, analyser_index=anal_idx)) + + +class MeasurementMcaGroup(commonh5.Group, SpecH5Group): + def __init__(self, parent, analyser_index): + basename = "mca_%d" % analyser_index + commonh5.Group.__init__(self, name=basename, parent=parent, + attrs={}) + + target_name = self.name.replace("measurement", "instrument") + self.add_node(commonh5.SoftLink(name="data", + path=target_name + "/data", + parent=self)) + self.add_node(commonh5.SoftLink(name="info", + path=target_name, + parent=self)) + + +class SampleGroup(commonh5.Group, SpecH5Group): + def __init__(self, parent, scan): + """ + + :param parent: parent Group + :param scan: specfile.Scan object + """ + commonh5.Group.__init__(self, name="sample", parent=parent, + attrs={"NX_class": to_h5py_utf8("NXsample"),}) + + if _unit_cell_in_scan(scan): + self.add_node(SpecH5NodeDataset(name="unit_cell", + data=_parse_unit_cell(scan.scan_header_dict["G1"]), + parent=self, + attrs={"interpretation": to_h5py_utf8("scalar")})) + self.add_node(SpecH5NodeDataset(name="unit_cell_abc", + data=_parse_unit_cell(scan.scan_header_dict["G1"])[0, 0:3], + parent=self, + attrs={"interpretation": to_h5py_utf8("scalar")})) + self.add_node(SpecH5NodeDataset(name="unit_cell_alphabetagamma", + data=_parse_unit_cell(scan.scan_header_dict["G1"])[0, 3:6], + parent=self, + attrs={"interpretation": to_h5py_utf8("scalar")})) + if _ub_matrix_in_scan(scan): + self.add_node(SpecH5NodeDataset(name="ub_matrix", + data=_parse_UB_matrix(scan.scan_header_dict["G3"]), + parent=self, + attrs={"interpretation": to_h5py_utf8("scalar")})) diff --git a/src/silx/io/spectoh5.py b/src/silx/io/spectoh5.py new file mode 100644 index 0000000..fb3b739 --- /dev/null +++ b/src/silx/io/spectoh5.py @@ -0,0 +1,81 @@ +# coding: utf-8 +# /*########################################################################## +# Copyright (C) 2016-2017 European Synchrotron Radiation Facility +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. +# +# ############################################################################*/ +"""Deprecated module. Use :mod:`convert` instead.""" + +from .convert import Hdf5Writer +from .convert import write_to_h5 +from .convert import convert as other_convert + +from silx.utils import deprecation + +deprecation.deprecated_warning(type_="Module", + name="silx.io.spectoh5", + since_version="0.6", + replacement="silx.io.convert") + + +class SpecToHdf5Writer(Hdf5Writer): + def __init__(self, h5path='/', overwrite_data=False, + link_type="hard", create_dataset_args=None): + deprecation.deprecated_warning( + type_="Class", + name="SpecToHdf5Writer", + since_version="0.6", + replacement="silx.io.convert.Hdf5Writer") + Hdf5Writer.__init__(self, h5path, overwrite_data, + link_type, create_dataset_args) + + # methods whose signatures changed + def write(self, sfh5, h5f): + Hdf5Writer.write(self, infile=sfh5, h5f=h5f) + + def append_spec_member_to_h5(self, spec_h5_name, obj): + Hdf5Writer.append_member_to_h5(self, + h5like_name=spec_h5_name, + obj=obj) + + +@deprecation.deprecated(replacement="silx.io.convert.write_to_h5", + since_version="0.6") +def write_spec_to_h5(specfile, h5file, h5path='/', + mode="a", overwrite_data=False, + link_type="hard", create_dataset_args=None): + + write_to_h5(infile=specfile, + h5file=h5file, + h5path=h5path, + mode=mode, + overwrite_data=overwrite_data, + link_type=link_type, + create_dataset_args=create_dataset_args) + + +@deprecation.deprecated(replacement="silx.io.convert.convert", + since_version="0.6") +def convert(specfile, h5file, mode="w-", + create_dataset_args=None): + other_convert(infile=specfile, + h5file=h5file, + mode=mode, + create_dataset_args=create_dataset_args) diff --git a/src/silx/io/test/__init__.py b/src/silx/io/test/__init__.py new file mode 100644 index 0000000..244d090 --- /dev/null +++ b/src/silx/io/test/__init__.py @@ -0,0 +1,23 @@ +# coding: utf-8 +# /*########################################################################## +# Copyright (C) 2016-2017 European Synchrotron Radiation Facility +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. +# +# ############################################################################*/ diff --git a/src/silx/io/test/test_commonh5.py b/src/silx/io/test/test_commonh5.py new file mode 100644 index 0000000..27f6e8c --- /dev/null +++ b/src/silx/io/test/test_commonh5.py @@ -0,0 +1,285 @@ +# coding: utf-8 +# /*########################################################################## +# Copyright (C) 2016-2017 European Synchrotron Radiation Facility +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. +# +# ############################################################################*/ +"""Tests for commonh5 wrapper""" + +__authors__ = ["V. Valls"] +__license__ = "MIT" +__date__ = "21/09/2017" + +import logging +import numpy +import unittest +import tempfile +import shutil + +_logger = logging.getLogger(__name__) + +import silx.io +import silx.io.utils +import h5py + +try: + from .. import commonh5 +except ImportError: + commonh5 = None + + +class _TestCommonFeatures(unittest.TestCase): + """Test common features supported by h5py and our implementation.""" + __test__ = False # ignore abstract class tests + + @classmethod + def createFile(cls): + return None + + @classmethod + def setUpClass(cls): + # Set to None cause create_resource can raise an excpetion + cls.h5 = None + cls.h5 = cls.create_resource() + if cls.h5 is None: + raise unittest.SkipTest("File not created") + + @classmethod + def create_resource(cls): + """Must be implemented""" + return None + + @classmethod + def tearDownClass(cls): + cls.h5 = None + + def test_file(self): + node = self.h5 + self.assertTrue(silx.io.is_file(node)) + self.assertTrue(silx.io.is_group(node)) + self.assertFalse(silx.io.is_dataset(node)) + self.assertEqual(len(node.attrs), 0) + + def test_group(self): + node = self.h5["group"] + self.assertFalse(silx.io.is_file(node)) + self.assertTrue(silx.io.is_group(node)) + self.assertFalse(silx.io.is_dataset(node)) + self.assertEqual(len(node.attrs), 0) + class_ = self.h5.get("group", getclass=True) + classlink = self.h5.get("group", getlink=True, getclass=True) + self.assertEqual(class_, h5py.Group) + self.assertEqual(classlink, h5py.HardLink) + + def test_dataset(self): + node = self.h5["group/dataset"] + self.assertFalse(silx.io.is_file(node)) + self.assertFalse(silx.io.is_group(node)) + self.assertTrue(silx.io.is_dataset(node)) + self.assertEqual(len(node.attrs), 0) + class_ = self.h5.get("group/dataset", getclass=True) + classlink = self.h5.get("group/dataset", getlink=True, getclass=True) + self.assertEqual(class_, h5py.Dataset) + self.assertEqual(classlink, h5py.HardLink) + + def test_soft_link(self): + node = self.h5["link/soft_link"] + self.assertEqual(node.name, "/link/soft_link") + class_ = self.h5.get("link/soft_link", getclass=True) + link = self.h5.get("link/soft_link", getlink=True) + classlink = self.h5.get("link/soft_link", getlink=True, getclass=True) + self.assertEqual(class_, h5py.Dataset) + self.assertTrue(isinstance(link, (h5py.SoftLink, commonh5.SoftLink))) + self.assertTrue(silx.io.utils.is_softlink(link)) + self.assertEqual(classlink, h5py.SoftLink) + + def test_external_link(self): + node = self.h5["link/external_link"] + self.assertEqual(node.name, "/target/dataset") + class_ = self.h5.get("link/external_link", getclass=True) + classlink = self.h5.get("link/external_link", getlink=True, getclass=True) + self.assertEqual(class_, h5py.Dataset) + self.assertEqual(classlink, h5py.ExternalLink) + + def test_external_link_to_link(self): + node = self.h5["link/external_link_to_link"] + self.assertEqual(node.name, "/target/link") + class_ = self.h5.get("link/external_link_to_link", getclass=True) + classlink = self.h5.get("link/external_link_to_link", getlink=True, getclass=True) + self.assertEqual(class_, h5py.Dataset) + self.assertEqual(classlink, h5py.ExternalLink) + + def test_create_groups(self): + c = self.h5.create_group(self.id() + "/a/b/c") + d = c.create_group("/" + self.id() + "/a/b/d") + + self.assertRaises(ValueError, self.h5.create_group, self.id() + "/a/b/d") + self.assertEqual(c.name, "/" + self.id() + "/a/b/c") + self.assertEqual(d.name, "/" + self.id() + "/a/b/d") + + def test_setitem_python_object_dataset(self): + group = self.h5.create_group(self.id()) + group["a"] = 10 + self.assertEqual(group["a"].dtype.kind, "i") + + def test_setitem_numpy_dataset(self): + group = self.h5.create_group(self.id()) + group["a"] = numpy.array([10, 20, 30]) + self.assertEqual(group["a"].dtype.kind, "i") + self.assertEqual(group["a"].shape, (3,)) + + def test_setitem_link(self): + group = self.h5.create_group(self.id()) + group["a"] = 10 + group["b"] = group["a"] + self.assertEqual(group["b"].dtype.kind, "i") + + def test_setitem_dataset_is_sub_group(self): + self.h5[self.id() + "/a"] = 10 + + +class TestCommonFeatures_h5py(_TestCommonFeatures): + """Check if h5py is compliant with what we expect.""" + __test__ = True # because _TestCommonFeatures is ignored + + @classmethod + def create_resource(cls): + cls.tmp_dir = tempfile.mkdtemp() + + externalh5 = h5py.File(cls.tmp_dir + "/external.h5", mode="w") + externalh5["target/dataset"] = 50 + externalh5["target/link"] = h5py.SoftLink("/target/dataset") + externalh5.close() + + h5 = h5py.File(cls.tmp_dir + "/base.h5", mode="w") + h5["group/dataset"] = 50 + h5["link/soft_link"] = h5py.SoftLink("/group/dataset") + h5["link/external_link"] = h5py.ExternalLink("external.h5", "/target/dataset") + h5["link/external_link_to_link"] = h5py.ExternalLink("external.h5", "/target/link") + + return h5 + + @classmethod + def tearDownClass(cls): + super(TestCommonFeatures_h5py, cls).tearDownClass() + if hasattr(cls, "tmp_dir") and cls.tmp_dir is not None: + shutil.rmtree(cls.tmp_dir) + + +class TestCommonFeatures_commonH5(_TestCommonFeatures): + """Check if commonh5 is compliant with h5py.""" + __test__ = True # because _TestCommonFeatures is ignored + + @classmethod + def create_resource(cls): + h5 = commonh5.File("base.h5", "w") + h5.create_group("group").create_dataset("dataset", data=numpy.int32(50)) + + link = h5.create_group("link") + link.add_node(commonh5.SoftLink("soft_link", "/group/dataset")) + + return h5 + + def test_external_link(self): + # not applicable + pass + + def test_external_link_to_link(self): + # not applicable + pass + + +class TestSpecificCommonH5(unittest.TestCase): + """Test specific features from commonh5. + + Test of shared features should be done by TestCommonFeatures.""" + + def setUp(self): + if commonh5 is None: + self.skipTest("silx.io.commonh5 is needed") + + def test_node_attrs(self): + node = commonh5.Node("Foo", attrs={"a": 1}) + self.assertEqual(node.attrs["a"], 1) + node.attrs["b"] = 8 + self.assertEqual(node.attrs["b"], 8) + node.attrs["b"] = 2 + self.assertEqual(node.attrs["b"], 2) + + def test_node_readonly_attrs(self): + f = commonh5.File(name="Foo", mode="r") + node = commonh5.Node("Foo", attrs={"a": 1}) + node.attrs["b"] = 8 + f.add_node(node) + self.assertEqual(node.attrs["b"], 8) + try: + node.attrs["b"] = 1 + self.fail() + except RuntimeError: + pass + + def test_create_dataset(self): + f = commonh5.File(name="Foo", mode="w") + node = f.create_dataset("foo", data=numpy.array([1])) + self.assertIs(node.parent, f) + self.assertIs(f["foo"], node) + + def test_create_group(self): + f = commonh5.File(name="Foo", mode="w") + node = f.create_group("foo") + self.assertIs(node.parent, f) + self.assertIs(f["foo"], node) + + def test_readonly_create_dataset(self): + f = commonh5.File(name="Foo", mode="r") + try: + f.create_dataset("foo", data=numpy.array([1])) + self.fail() + except RuntimeError: + pass + + def test_readonly_create_group(self): + f = commonh5.File(name="Foo", mode="r") + try: + f.create_group("foo") + self.fail() + except RuntimeError: + pass + + def test_create_unicode_dataset(self): + f = commonh5.File(name="Foo", mode="w") + try: + f.create_dataset("foo", data=numpy.array(u"aaaa")) + self.fail() + except TypeError: + pass + + def test_setitem_dataset(self): + self.h5 = commonh5.File(name="Foo", mode="w") + group = self.h5.create_group(self.id()) + group["a"] = commonh5.Dataset(None, data=numpy.array(10)) + self.assertEqual(group["a"].dtype.kind, "i") + + def test_setitem_explicit_link(self): + self.h5 = commonh5.File(name="Foo", mode="w") + group = self.h5.create_group(self.id()) + group["a"] = 10 + group["b"] = commonh5.SoftLink(None, path="/" + self.id() + "/a") + self.assertEqual(group["b"].dtype.kind, "i") diff --git a/src/silx/io/test/test_dictdump.py b/src/silx/io/test/test_dictdump.py new file mode 100644 index 0000000..4cafa9b --- /dev/null +++ b/src/silx/io/test/test_dictdump.py @@ -0,0 +1,1009 @@ +# coding: utf-8 +# /*########################################################################## +# Copyright (C) 2016-2021 European Synchrotron Radiation Facility +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. +# +# ############################################################################*/ +"""Tests for dicttoh5 module""" + +__authors__ = ["P. Knobel"] +__license__ = "MIT" +__date__ = "17/01/2018" + +from collections import OrderedDict +import numpy +import os +import tempfile +import unittest +import h5py +from copy import deepcopy + +from collections import defaultdict + +from silx.utils.testutils import LoggingValidator + +from ..configdict import ConfigDict +from .. import dictdump +from ..dictdump import dicttoh5, dicttojson, dump +from ..dictdump import h5todict, load +from ..dictdump import logger as dictdump_logger +from ..utils import is_link +from ..utils import h5py_read_dataset + + +def tree(): + """Tree data structure as a recursive nested dictionary""" + return defaultdict(tree) + + +inhabitants = 160215 + +city_attrs = tree() +city_attrs["Europe"]["France"]["Grenoble"]["area"] = "18.44 km2" +city_attrs["Europe"]["France"]["Grenoble"]["inhabitants"] = inhabitants +city_attrs["Europe"]["France"]["Grenoble"]["coordinates"] = [45.1830, 5.7196] +city_attrs["Europe"]["France"]["Tourcoing"]["area"] + +ext_attrs = tree() +ext_attrs["ext_group"]["dataset"] = 10 +ext_filename = "ext.h5" + +link_attrs = tree() +link_attrs["links"]["group"]["dataset"] = 10 +link_attrs["links"]["group"]["relative_softlink"] = h5py.SoftLink("dataset") +link_attrs["links"]["relative_softlink"] = h5py.SoftLink("group/dataset") +link_attrs["links"]["absolute_softlink"] = h5py.SoftLink("/links/group/dataset") +link_attrs["links"]["external_link"] = h5py.ExternalLink(ext_filename, "/ext_group/dataset") + + +class DictTestCase(unittest.TestCase): + + def assertRecursiveEqual(self, expected, actual, nodes=tuple()): + err_msg = "\n\n Tree nodes: {}".format(nodes) + if isinstance(expected, dict): + self.assertTrue(isinstance(actual, dict), msg=err_msg) + self.assertEqual( + set(expected.keys()), + set(actual.keys()), + msg=err_msg + ) + for k in actual: + self.assertRecursiveEqual( + expected[k], + actual[k], + nodes=nodes + (k,), + ) + return + if isinstance(actual, numpy.ndarray): + actual = actual.tolist() + if isinstance(expected, numpy.ndarray): + expected = expected.tolist() + + self.assertEqual(expected, actual, msg=err_msg) + + +class H5DictTestCase(DictTestCase): + + def _dictRoundTripNormalize(self, treedict): + """Convert the dictionary as expected from a round-trip + treedict -> dicttoh5 -> h5todict -> newtreedict + """ + for key, value in list(treedict.items()): + if isinstance(value, dict): + self._dictRoundTripNormalize(value) + + # Expand treedict[("group", "attr_name")] + # to treedict["group"]["attr_name"] + for key, value in list(treedict.items()): + if not isinstance(key, tuple): + continue + # Put the attribute inside the group + grpname, attr = key + if not grpname: + continue + group = treedict.setdefault(grpname, dict()) + if isinstance(group, dict): + del treedict[key] + group[("", attr)] = value + + def dictRoundTripNormalize(self, treedict): + treedict2 = deepcopy(treedict) + self._dictRoundTripNormalize(treedict2) + return treedict2 + + +class TestDictToH5(H5DictTestCase): + def setUp(self): + self.tempdir = tempfile.mkdtemp() + self.h5_fname = os.path.join(self.tempdir, "cityattrs.h5") + self.h5_ext_fname = os.path.join(self.tempdir, ext_filename) + + def tearDown(self): + if os.path.exists(self.h5_fname): + os.unlink(self.h5_fname) + if os.path.exists(self.h5_ext_fname): + os.unlink(self.h5_ext_fname) + os.rmdir(self.tempdir) + + def testH5CityAttrs(self): + filters = {'shuffle': True, + 'fletcher32': True} + dicttoh5(city_attrs, self.h5_fname, h5path='/city attributes', + mode="w", create_dataset_args=filters) + + h5f = h5py.File(self.h5_fname, mode='r') + + self.assertIn("Tourcoing/area", h5f["/city attributes/Europe/France"]) + ds = h5f["/city attributes/Europe/France/Grenoble/inhabitants"] + self.assertEqual(ds[...], 160215) + + # filters only apply to datasets that are not scalars (shape != () ) + ds = h5f["/city attributes/Europe/France/Grenoble/coordinates"] + #self.assertEqual(ds.compression, "gzip") + self.assertTrue(ds.fletcher32) + self.assertTrue(ds.shuffle) + + h5f.close() + + ddict = load(self.h5_fname, fmat="hdf5") + self.assertAlmostEqual( + min(ddict["city attributes"]["Europe"]["France"]["Grenoble"]["coordinates"]), + 5.7196) + + def testH5OverwriteDeprecatedApi(self): + dd = ConfigDict({'t': True}) + + dicttoh5(h5file=self.h5_fname, treedict=dd, mode='a') + dd = ConfigDict({'t': False}) + dicttoh5(h5file=self.h5_fname, treedict=dd, mode='a', + overwrite_data=False) + + res = h5todict(self.h5_fname) + assert(res['t'] == True) + + dicttoh5(h5file=self.h5_fname, treedict=dd, mode='a', + overwrite_data=True) + + res = h5todict(self.h5_fname) + assert(res['t'] == False) + + def testAttributes(self): + """Any kind of attribute can be described""" + ddict = { + "group": {"datatset": "hmmm", ("", "group_attr"): 10}, + "dataset": "aaaaaaaaaaaaaaa", + ("", "root_attr"): 11, + ("dataset", "dataset_attr"): 12, + ("group", "group_attr2"): 13, + } + with h5py.File(self.h5_fname, "w") as h5file: + dictdump.dicttoh5(ddict, h5file) + self.assertEqual(h5file["group"].attrs['group_attr'], 10) + self.assertEqual(h5file.attrs['root_attr'], 11) + self.assertEqual(h5file["dataset"].attrs['dataset_attr'], 12) + self.assertEqual(h5file["group"].attrs['group_attr2'], 13) + + def testPathAttributes(self): + """A group is requested at a path""" + ddict = { + ("", "NX_class"): 'NXcollection', + } + with h5py.File(self.h5_fname, "w") as h5file: + # This should not warn + with LoggingValidator(dictdump_logger, warning=0): + dictdump.dicttoh5(ddict, h5file, h5path="foo/bar") + + def testKeyOrder(self): + ddict1 = { + "d": "plow", + ("d", "a"): "ox", + } + ddict2 = { + ("d", "a"): "ox", + "d": "plow", + } + with h5py.File(self.h5_fname, "w") as h5file: + dictdump.dicttoh5(ddict1, h5file, h5path="g1") + dictdump.dicttoh5(ddict2, h5file, h5path="g2") + self.assertEqual(h5file["g1/d"].attrs['a'], "ox") + self.assertEqual(h5file["g2/d"].attrs['a'], "ox") + + def testAttributeValues(self): + """Any NX data types can be used""" + ddict = { + ("", "bool"): True, + ("", "int"): 11, + ("", "float"): 1.1, + ("", "str"): "a", + ("", "boollist"): [True, False, True], + ("", "intlist"): [11, 22, 33], + ("", "floatlist"): [1.1, 2.2, 3.3], + ("", "strlist"): ["a", "bb", "ccc"], + } + with h5py.File(self.h5_fname, "w") as h5file: + dictdump.dicttoh5(ddict, h5file) + for k, expected in ddict.items(): + result = h5file.attrs[k[1]] + if isinstance(expected, list): + if isinstance(expected[0], str): + numpy.testing.assert_array_equal(result, expected) + else: + numpy.testing.assert_array_almost_equal(result, expected) + else: + self.assertEqual(result, expected) + + def testAttributeAlreadyExists(self): + """A duplicated attribute warns if overwriting is not enabled""" + ddict = { + "group": {"dataset": "hmmm", ("", "attr"): 10}, + ("group", "attr"): 10, + } + with h5py.File(self.h5_fname, "w") as h5file: + dictdump.dicttoh5(ddict, h5file) + self.assertEqual(h5file["group"].attrs['attr'], 10) + + def testFlatDict(self): + """Description of a tree with a single level of keys""" + ddict = { + "group/group/dataset": 10, + ("group/group/dataset", "attr"): 11, + ("group/group", "attr"): 12, + } + with h5py.File(self.h5_fname, "w") as h5file: + dictdump.dicttoh5(ddict, h5file) + self.assertEqual(h5file["group/group/dataset"][()], 10) + self.assertEqual(h5file["group/group/dataset"].attrs['attr'], 11) + self.assertEqual(h5file["group/group"].attrs['attr'], 12) + + def testLinks(self): + with h5py.File(self.h5_ext_fname, "w") as h5file: + dictdump.dicttoh5(ext_attrs, h5file) + with h5py.File(self.h5_fname, "w") as h5file: + dictdump.dicttoh5(link_attrs, h5file) + with h5py.File(self.h5_fname, "r") as h5file: + self.assertEqual(h5file["links/group/dataset"][()], 10) + self.assertEqual(h5file["links/group/relative_softlink"][()], 10) + self.assertEqual(h5file["links/relative_softlink"][()], 10) + self.assertEqual(h5file["links/absolute_softlink"][()], 10) + self.assertEqual(h5file["links/external_link"][()], 10) + + def testDumpNumpyArray(self): + ddict = { + 'darks': { + '0': numpy.array([[0, 0, 0], [0, 0, 0]], dtype=numpy.uint16) + } + } + with h5py.File(self.h5_fname, "w") as h5file: + dictdump.dicttoh5(ddict, h5file) + with h5py.File(self.h5_fname, "r") as h5file: + numpy.testing.assert_array_equal(h5py_read_dataset(h5file["darks"]["0"]), + ddict['darks']['0']) + + def testOverwrite(self): + # Tree structure that will be tested + group1 = { + ("", "attr2"): "original2", + "dset1": 0, + "dset2": [0, 1], + ("dset1", "attr1"): "original1", + ("dset1", "attr2"): "original2", + ("dset2", "attr1"): "original1", + ("dset2", "attr2"): "original2", + } + group2 = { + "subgroup1": group1.copy(), + "subgroup2": group1.copy(), + ("subgroup1", "attr1"): "original1", + ("subgroup2", "attr1"): "original1" + } + group2.update(group1) + # initial HDF5 tree + otreedict = { + ('', 'attr1'): "original1", + ('', 'attr2'): "original2", + 'group1': group1, + 'group2': group2, + ('group1', 'attr1'): "original1", + ('group2', 'attr1'): "original1" + } + wtreedict = None # dumped dictionary + etreedict = None # expected HDF5 tree after dump + + def reset_file(): + dicttoh5( + otreedict, + h5file=self.h5_fname, + mode="w", + ) + + def append_file(update_mode): + dicttoh5( + wtreedict, + h5file=self.h5_fname, + mode="a", + update_mode=update_mode + ) + + def assert_file(): + rtreedict = h5todict( + self.h5_fname, + include_attributes=True, + asarray=False + ) + netreedict = self.dictRoundTripNormalize(etreedict) + try: + self.assertRecursiveEqual(netreedict, rtreedict) + except AssertionError: + from pprint import pprint + print("\nDUMP:") + pprint(wtreedict) + print("\nEXPECTED:") + pprint(netreedict) + print("\nHDF5:") + pprint(rtreedict) + raise + + def assert_append(update_mode): + append_file(update_mode) + assert_file() + + # Test wrong arguments + with self.assertRaises(ValueError): + dicttoh5( + otreedict, + h5file=self.h5_fname, + mode="w", + update_mode="wrong-value" + ) + + # No writing + reset_file() + etreedict = deepcopy(otreedict) + assert_file() + + # Write identical dictionary + wtreedict = deepcopy(otreedict) + + reset_file() + etreedict = deepcopy(otreedict) + for update_mode in [None, "add", "modify", "replace"]: + assert_append(update_mode) + + # Write empty dictionary + wtreedict = dict() + + reset_file() + etreedict = deepcopy(otreedict) + for update_mode in [None, "add", "modify", "replace"]: + assert_append(update_mode) + + # Modified dataset + wtreedict = dict() + wtreedict["group2"] = dict() + wtreedict["group2"]["subgroup2"] = dict() + wtreedict["group2"]["subgroup2"]["dset1"] = {"dset3": [10, 20]} + wtreedict["group2"]["subgroup2"]["dset2"] = [10, 20] + + reset_file() + etreedict = deepcopy(otreedict) + for update_mode in [None, "add"]: + assert_append(update_mode) + + etreedict["group2"]["subgroup2"]["dset2"] = [10, 20] + assert_append("modify") + + etreedict["group2"] = dict() + del etreedict[("group2", "attr1")] + etreedict["group2"]["subgroup2"] = dict() + etreedict["group2"]["subgroup2"]["dset1"] = {"dset3": [10, 20]} + etreedict["group2"]["subgroup2"]["dset2"] = [10, 20] + assert_append("replace") + + # Modified group + wtreedict = dict() + wtreedict["group2"] = dict() + wtreedict["group2"]["subgroup2"] = [0, 1] + + reset_file() + etreedict = deepcopy(otreedict) + for update_mode in [None, "add", "modify"]: + assert_append(update_mode) + + etreedict["group2"] = dict() + del etreedict[("group2", "attr1")] + etreedict["group2"]["subgroup2"] = [0, 1] + assert_append("replace") + + # Modified attribute + wtreedict = dict() + wtreedict["group2"] = dict() + wtreedict["group2"]["subgroup2"] = dict() + wtreedict["group2"]["subgroup2"][("dset1", "attr1")] = "modified" + + reset_file() + etreedict = deepcopy(otreedict) + for update_mode in [None, "add"]: + assert_append(update_mode) + + etreedict["group2"]["subgroup2"][("dset1", "attr1")] = "modified" + assert_append("modify") + + etreedict["group2"] = dict() + del etreedict[("group2", "attr1")] + etreedict["group2"]["subgroup2"] = dict() + etreedict["group2"]["subgroup2"]["dset1"] = dict() + etreedict["group2"]["subgroup2"]["dset1"][("", "attr1")] = "modified" + assert_append("replace") + + # Delete group + wtreedict = dict() + wtreedict["group2"] = dict() + wtreedict["group2"]["subgroup2"] = None + + reset_file() + etreedict = deepcopy(otreedict) + for update_mode in [None, "add"]: + assert_append(update_mode) + + del etreedict["group2"]["subgroup2"] + del etreedict["group2"][("subgroup2", "attr1")] + assert_append("modify") + + etreedict["group2"] = dict() + del etreedict[("group2", "attr1")] + assert_append("replace") + + # Delete dataset + wtreedict = dict() + wtreedict["group2"] = dict() + wtreedict["group2"]["subgroup2"] = dict() + wtreedict["group2"]["subgroup2"]["dset2"] = None + + reset_file() + etreedict = deepcopy(otreedict) + for update_mode in [None, "add"]: + assert_append(update_mode) + + del etreedict["group2"]["subgroup2"]["dset2"] + del etreedict["group2"]["subgroup2"][("dset2", "attr1")] + del etreedict["group2"]["subgroup2"][("dset2", "attr2")] + assert_append("modify") + + etreedict["group2"] = dict() + del etreedict[("group2", "attr1")] + etreedict["group2"]["subgroup2"] = dict() + assert_append("replace") + + # Delete attribute + wtreedict = dict() + wtreedict["group2"] = dict() + wtreedict["group2"]["subgroup2"] = dict() + wtreedict["group2"]["subgroup2"][("dset2", "attr1")] = None + + reset_file() + etreedict = deepcopy(otreedict) + for update_mode in [None, "add"]: + assert_append(update_mode) + + del etreedict["group2"]["subgroup2"][("dset2", "attr1")] + assert_append("modify") + + etreedict["group2"] = dict() + del etreedict[("group2", "attr1")] + etreedict["group2"]["subgroup2"] = dict() + etreedict["group2"]["subgroup2"]["dset2"] = dict() + assert_append("replace") + + +class TestH5ToDict(H5DictTestCase): + def setUp(self): + self.tempdir = tempfile.mkdtemp() + self.h5_fname = os.path.join(self.tempdir, "cityattrs.h5") + self.h5_ext_fname = os.path.join(self.tempdir, ext_filename) + dicttoh5(city_attrs, self.h5_fname) + dicttoh5(link_attrs, self.h5_fname, mode="a") + dicttoh5(ext_attrs, self.h5_ext_fname) + + def tearDown(self): + if os.path.exists(self.h5_fname): + os.unlink(self.h5_fname) + if os.path.exists(self.h5_ext_fname): + os.unlink(self.h5_ext_fname) + os.rmdir(self.tempdir) + + def testExcludeNames(self): + ddict = h5todict(self.h5_fname, path="/Europe/France", + exclude_names=["ourcoing", "inhab", "toto"]) + self.assertNotIn("Tourcoing", ddict) + self.assertIn("Grenoble", ddict) + + self.assertNotIn("inhabitants", ddict["Grenoble"]) + self.assertIn("coordinates", ddict["Grenoble"]) + self.assertIn("area", ddict["Grenoble"]) + + def testAsArrayTrue(self): + """Test with asarray=True, the default""" + ddict = h5todict(self.h5_fname, path="/Europe/France/Grenoble") + self.assertTrue(numpy.array_equal(ddict["inhabitants"], numpy.array(inhabitants))) + + def testAsArrayFalse(self): + """Test with asarray=False""" + ddict = h5todict(self.h5_fname, path="/Europe/France/Grenoble", asarray=False) + self.assertEqual(ddict["inhabitants"], inhabitants) + + def testDereferenceLinks(self): + ddict = h5todict(self.h5_fname, path="links", dereference_links=True) + self.assertTrue(ddict["absolute_softlink"], 10) + self.assertTrue(ddict["relative_softlink"], 10) + self.assertTrue(ddict["external_link"], 10) + self.assertTrue(ddict["group"]["relative_softlink"], 10) + + def testPreserveLinks(self): + ddict = h5todict(self.h5_fname, path="links", dereference_links=False) + self.assertTrue(is_link(ddict["absolute_softlink"])) + self.assertTrue(is_link(ddict["relative_softlink"])) + self.assertTrue(is_link(ddict["external_link"])) + self.assertTrue(is_link(ddict["group"]["relative_softlink"])) + + def testStrings(self): + ddict = {"dset_bytes": b"bytes", + "dset_utf8": "utf8", + "dset_2bytes": [b"bytes", b"bytes"], + "dset_2utf8": ["utf8", "utf8"], + ("", "attr_bytes"): b"bytes", + ("", "attr_utf8"): "utf8", + ("", "attr_2bytes"): [b"bytes", b"bytes"], + ("", "attr_2utf8"): ["utf8", "utf8"]} + dicttoh5(ddict, self.h5_fname, mode="w") + adict = h5todict(self.h5_fname, include_attributes=True, asarray=False) + self.assertEqual(ddict["dset_bytes"], adict["dset_bytes"]) + self.assertEqual(ddict["dset_utf8"], adict["dset_utf8"]) + self.assertEqual(ddict[("", "attr_bytes")], adict[("", "attr_bytes")]) + self.assertEqual(ddict[("", "attr_utf8")], adict[("", "attr_utf8")]) + numpy.testing.assert_array_equal(ddict["dset_2bytes"], adict["dset_2bytes"]) + numpy.testing.assert_array_equal(ddict["dset_2utf8"], adict["dset_2utf8"]) + numpy.testing.assert_array_equal(ddict[("", "attr_2bytes")], adict[("", "attr_2bytes")]) + numpy.testing.assert_array_equal(ddict[("", "attr_2utf8")], adict[("", "attr_2utf8")]) + + +class TestDictToNx(H5DictTestCase): + def setUp(self): + self.tempdir = tempfile.mkdtemp() + self.h5_fname = os.path.join(self.tempdir, "nx.h5") + self.h5_ext_fname = os.path.join(self.tempdir, "nx_ext.h5") + + def tearDown(self): + if os.path.exists(self.h5_fname): + os.unlink(self.h5_fname) + if os.path.exists(self.h5_ext_fname): + os.unlink(self.h5_ext_fname) + os.rmdir(self.tempdir) + + def testAttributes(self): + """Any kind of attribute can be described""" + ddict = { + "group": {"dataset": 100, "@group_attr1": 10}, + "dataset": 200, + "@root_attr": 11, + "dataset@dataset_attr": "12", + "group@group_attr2": 13, + } + with h5py.File(self.h5_fname, "w") as h5file: + dictdump.dicttonx(ddict, h5file) + self.assertEqual(h5file["group"].attrs['group_attr1'], 10) + self.assertEqual(h5file.attrs['root_attr'], 11) + self.assertEqual(h5file["dataset"].attrs['dataset_attr'], "12") + self.assertEqual(h5file["group"].attrs['group_attr2'], 13) + + def testKeyOrder(self): + ddict1 = { + "d": "plow", + "d@a": "ox", + } + ddict2 = { + "d@a": "ox", + "d": "plow", + } + with h5py.File(self.h5_fname, "w") as h5file: + dictdump.dicttonx(ddict1, h5file, h5path="g1") + dictdump.dicttonx(ddict2, h5file, h5path="g2") + self.assertEqual(h5file["g1/d"].attrs['a'], "ox") + self.assertEqual(h5file["g2/d"].attrs['a'], "ox") + + def testAttributeValues(self): + """Any NX data types can be used""" + ddict = { + "@bool": True, + "@int": 11, + "@float": 1.1, + "@str": "a", + "@boollist": [True, False, True], + "@intlist": [11, 22, 33], + "@floatlist": [1.1, 2.2, 3.3], + "@strlist": ["a", "bb", "ccc"], + } + with h5py.File(self.h5_fname, "w") as h5file: + dictdump.dicttonx(ddict, h5file) + for k, expected in ddict.items(): + result = h5file.attrs[k[1:]] + if isinstance(expected, list): + if isinstance(expected[0], str): + numpy.testing.assert_array_equal(result, expected) + else: + numpy.testing.assert_array_almost_equal(result, expected) + else: + self.assertEqual(result, expected) + + def testFlatDict(self): + """Description of a tree with a single level of keys""" + ddict = { + "group/group/dataset": 10, + "group/group/dataset@attr": 11, + "group/group@attr": 12, + } + with h5py.File(self.h5_fname, "w") as h5file: + dictdump.dicttonx(ddict, h5file) + self.assertEqual(h5file["group/group/dataset"][()], 10) + self.assertEqual(h5file["group/group/dataset"].attrs['attr'], 11) + self.assertEqual(h5file["group/group"].attrs['attr'], 12) + + def testLinks(self): + ddict = {"ext_group": {"dataset": 10}} + dictdump.dicttonx(ddict, self.h5_ext_fname) + ddict = {"links": {"group": {"dataset": 10, ">relative_softlink": "dataset"}, + ">relative_softlink": "group/dataset", + ">absolute_softlink": "/links/group/dataset", + ">external_link": "nx_ext.h5::/ext_group/dataset"}} + dictdump.dicttonx(ddict, self.h5_fname) + with h5py.File(self.h5_fname, "r") as h5file: + self.assertEqual(h5file["links/group/dataset"][()], 10) + self.assertEqual(h5file["links/group/relative_softlink"][()], 10) + self.assertEqual(h5file["links/relative_softlink"][()], 10) + self.assertEqual(h5file["links/absolute_softlink"][()], 10) + self.assertEqual(h5file["links/external_link"][()], 10) + + def testUpLinks(self): + ddict = {"data": {"group": {"dataset": 10, ">relative_softlink": "dataset"}}, + "links": {"group": {"subgroup": {">relative_softlink": "../../../data/group/dataset"}}}} + dictdump.dicttonx(ddict, self.h5_fname) + with h5py.File(self.h5_fname, "r") as h5file: + self.assertEqual(h5file["/links/group/subgroup/relative_softlink"][()], 10) + + def testOverwrite(self): + entry_name = "entry" + wtreedict = { + "group1": {"a": 1, "b": 2}, + "group2@attr3": "attr3", + "group2@attr4": "attr4", + "group2": { + "@attr1": "attr1", + "@attr2": "attr2", + "c": 3, + "d": 4, + "dataset4": 8, + "dataset4@units": "keV", + }, + "group3": {"subgroup": {"e": 9, "f": 10}}, + "dataset1": 5, + "dataset2": 6, + "dataset3": 7, + "dataset3@units": "mm", + } + esubtree = { + "@NX_class": "NXentry", + "group1": {"@NX_class": "NXcollection", "a": 1, "b": 2}, + "group2": { + "@NX_class": "NXcollection", + "@attr1": "attr1", + "@attr2": "attr2", + "@attr3": "attr3", + "@attr4": "attr4", + "c": 3, + "d": 4, + "dataset4": 8, + "dataset4@units": "keV", + }, + "group3": { + "@NX_class": "NXcollection", + "subgroup": {"@NX_class": "NXcollection", "e": 9, "f": 10}, + }, + "dataset1": 5, + "dataset2": 6, + "dataset3": 7, + "dataset3@units": "mm", + } + etreedict = {entry_name: esubtree} + + def append_file(update_mode, add_nx_class): + dictdump.dicttonx( + wtreedict, + h5file=self.h5_fname, + mode="a", + h5path=entry_name, + update_mode=update_mode, + add_nx_class=add_nx_class + ) + + def assert_file(): + rtreedict = dictdump.nxtodict( + self.h5_fname, + include_attributes=True, + asarray=False, + ) + netreedict = self.dictRoundTripNormalize(etreedict) + try: + self.assertRecursiveEqual(netreedict, rtreedict) + except AssertionError: + from pprint import pprint + print("\nDUMP:") + pprint(wtreedict) + print("\nEXPECTED:") + pprint(netreedict) + print("\nHDF5:") + pprint(rtreedict) + raise + + def assert_append(update_mode, add_nx_class=None): + append_file(update_mode, add_nx_class=add_nx_class) + assert_file() + + # First to an empty file + assert_append(None) + + # Add non-existing attributes/datasets/groups + wtreedict["group1"].pop("a") + wtreedict["group2"].pop("@attr1") + wtreedict["group2"]["@attr2"] = "attr3" # only for update + wtreedict["group2"]["@type"] = "test" + wtreedict["group2"]["dataset4"] = 9 # only for update + del wtreedict["group2"]["dataset4@units"] + wtreedict["group3"] = {} + esubtree["group2"]["@type"] = "test" + assert_append("add") + + # Add update existing attributes and datasets + esubtree["group2"]["@attr2"] = "attr3" + esubtree["group2"]["dataset4"] = 9 + assert_append("modify") + + # Do not add missing NX_class by default when updating + wtreedict["group2"]["@NX_class"] = "NXprocess" + esubtree["group2"]["@NX_class"] = "NXprocess" + assert_append("modify") + del wtreedict["group2"]["@NX_class"] + assert_append("modify") + + # Overwrite existing groups/datasets/attributes + esubtree["group1"].pop("a") + esubtree["group2"].pop("@attr1") + esubtree["group2"]["@NX_class"] = "NXcollection" + esubtree["group2"]["dataset4"] = 9 + del esubtree["group2"]["dataset4@units"] + esubtree["group3"] = {"@NX_class": "NXcollection"} + assert_append("replace", add_nx_class=True) + + +class TestNxToDict(H5DictTestCase): + def setUp(self): + self.tempdir = tempfile.mkdtemp() + self.h5_fname = os.path.join(self.tempdir, "nx.h5") + self.h5_ext_fname = os.path.join(self.tempdir, "nx_ext.h5") + + def tearDown(self): + if os.path.exists(self.h5_fname): + os.unlink(self.h5_fname) + if os.path.exists(self.h5_ext_fname): + os.unlink(self.h5_ext_fname) + os.rmdir(self.tempdir) + + def testAttributes(self): + """Any kind of attribute can be described""" + ddict = { + "group": {"dataset": 100, "@group_attr1": 10}, + "dataset": 200, + "@root_attr": 11, + "dataset@dataset_attr": "12", + "group@group_attr2": 13, + } + dictdump.dicttonx(ddict, self.h5_fname) + ddict = dictdump.nxtodict(self.h5_fname, include_attributes=True) + self.assertEqual(ddict["group"]["@group_attr1"], 10) + self.assertEqual(ddict["@root_attr"], 11) + self.assertEqual(ddict["dataset@dataset_attr"], "12") + self.assertEqual(ddict["group"]["@group_attr2"], 13) + + def testDereferenceLinks(self): + """Write links and dereference on read""" + ddict = {"ext_group": {"dataset": 10}} + dictdump.dicttonx(ddict, self.h5_ext_fname) + ddict = {"links": {"group": {"dataset": 10, ">relative_softlink": "dataset"}, + ">relative_softlink": "group/dataset", + ">absolute_softlink": "/links/group/dataset", + ">external_link": "nx_ext.h5::/ext_group/dataset"}} + dictdump.dicttonx(ddict, self.h5_fname) + + ddict = dictdump.h5todict(self.h5_fname, dereference_links=True) + self.assertTrue(ddict["links"]["absolute_softlink"], 10) + self.assertTrue(ddict["links"]["relative_softlink"], 10) + self.assertTrue(ddict["links"]["external_link"], 10) + self.assertTrue(ddict["links"]["group"]["relative_softlink"], 10) + + def testPreserveLinks(self): + """Write/read links""" + ddict = {"ext_group": {"dataset": 10}} + dictdump.dicttonx(ddict, self.h5_ext_fname) + ddict = {"links": {"group": {"dataset": 10, ">relative_softlink": "dataset"}, + ">relative_softlink": "group/dataset", + ">absolute_softlink": "/links/group/dataset", + ">external_link": "nx_ext.h5::/ext_group/dataset"}} + dictdump.dicttonx(ddict, self.h5_fname) + + ddict = dictdump.nxtodict(self.h5_fname, dereference_links=False) + self.assertTrue(ddict["links"][">absolute_softlink"], "dataset") + self.assertTrue(ddict["links"][">relative_softlink"], "group/dataset") + self.assertTrue(ddict["links"][">external_link"], "/links/group/dataset") + self.assertTrue(ddict["links"]["group"][">relative_softlink"], "nx_ext.h5::/ext_group/datase") + + def testNotExistingPath(self): + """Test converting not existing path""" + with h5py.File(self.h5_fname, 'a') as f: + f['data'] = 1 + + ddict = h5todict(self.h5_fname, path="/I/am/not/a/path", errors='ignore') + self.assertFalse(ddict) + + with LoggingValidator(dictdump_logger, error=1): + ddict = h5todict(self.h5_fname, path="/I/am/not/a/path", errors='log') + self.assertFalse(ddict) + + with self.assertRaises(KeyError): + h5todict(self.h5_fname, path="/I/am/not/a/path", errors='raise') + + def testBrokenLinks(self): + """Test with broken links""" + with h5py.File(self.h5_fname, 'a') as f: + f["/Mars/BrokenSoftLink"] = h5py.SoftLink("/Idontexists") + f["/Mars/BrokenExternalLink"] = h5py.ExternalLink("notexistingfile.h5", "/Idontexists") + + ddict = h5todict(self.h5_fname, path="/Mars", errors='ignore') + self.assertFalse(ddict) + + with LoggingValidator(dictdump_logger, error=2): + ddict = h5todict(self.h5_fname, path="/Mars", errors='log') + self.assertFalse(ddict) + + with self.assertRaises(KeyError): + h5todict(self.h5_fname, path="/Mars", errors='raise') + + +class TestDictToJson(DictTestCase): + def setUp(self): + self.dir_path = tempfile.mkdtemp() + self.json_fname = os.path.join(self.dir_path, "cityattrs.json") + + def tearDown(self): + os.unlink(self.json_fname) + os.rmdir(self.dir_path) + + def testJsonCityAttrs(self): + self.json_fname = os.path.join(self.dir_path, "cityattrs.json") + dicttojson(city_attrs, self.json_fname, indent=3) + + with open(self.json_fname, "r") as f: + json_content = f.read() + self.assertIn('"inhabitants": 160215', json_content) + + +class TestDictToIni(DictTestCase): + def setUp(self): + self.dir_path = tempfile.mkdtemp() + self.ini_fname = os.path.join(self.dir_path, "test.ini") + + def tearDown(self): + os.unlink(self.ini_fname) + os.rmdir(self.dir_path) + + def testConfigDictIO(self): + """Ensure values and types of data is preserved when dictionary is + written to file and read back.""" + testdict = { + 'simple_types': { + 'float': 1.0, + 'int': 1, + 'percent string': '5 % is too much', + 'backslash string': 'i can use \\', + 'empty_string': '', + 'nonestring': 'None', + 'nonetype': None, + 'interpstring': 'interpolation: %(percent string)s', + }, + 'containers': { + 'list': [-1, 'string', 3.0, False, None], + 'array': numpy.array([1.0, 2.0, 3.0]), + 'dict': { + 'key1': 'Hello World', + 'key2': 2.0, + } + } + } + + dump(testdict, self.ini_fname) + + #read the data back + readdict = load(self.ini_fname) + + testdictkeys = list(testdict.keys()) + readkeys = list(readdict.keys()) + + self.assertTrue(len(readkeys) == len(testdictkeys), + "Number of read keys not equal") + + self.assertEqual(readdict['simple_types']["interpstring"], + "interpolation: 5 % is too much") + + testdict['simple_types']["interpstring"] = "interpolation: 5 % is too much" + + for key in testdict["simple_types"]: + original = testdict['simple_types'][key] + read = readdict['simple_types'][key] + self.assertEqual(read, original, + "Read <%s> instead of <%s>" % (read, original)) + + for key in testdict["containers"]: + original = testdict["containers"][key] + read = readdict["containers"][key] + if key == 'array': + self.assertEqual(read.all(), original.all(), + "Read <%s> instead of <%s>" % (read, original)) + else: + self.assertEqual(read, original, + "Read <%s> instead of <%s>" % (read, original)) + + def testConfigDictOrder(self): + """Ensure order is preserved when dictionary is + written to file and read back.""" + test_dict = {'banana': 3, 'apple': 4, 'pear': 1, 'orange': 2} + # sort by key + test_ordered_dict1 = OrderedDict(sorted(test_dict.items(), + key=lambda t: t[0])) + # sort by value + test_ordered_dict2 = OrderedDict(sorted(test_dict.items(), + key=lambda t: t[1])) + # add the two ordered dict as sections of a third ordered dict + test_ordered_dict3 = OrderedDict() + test_ordered_dict3["section1"] = test_ordered_dict1 + test_ordered_dict3["section2"] = test_ordered_dict2 + + # write to ini and read back as a ConfigDict (inherits OrderedDict) + dump(test_ordered_dict3, + self.ini_fname, fmat="ini") + read_instance = ConfigDict() + read_instance.read(self.ini_fname) + + # loop through original and read-back dictionaries, + # test identical order for key/value pairs + for orig_key, section in zip(test_ordered_dict3.keys(), + read_instance.keys()): + self.assertEqual(orig_key, section) + for orig_key2, read_key in zip(test_ordered_dict3[section].keys(), + read_instance[section].keys()): + self.assertEqual(orig_key2, read_key) + self.assertEqual(test_ordered_dict3[section][orig_key2], + read_instance[section][read_key]) diff --git a/src/silx/io/test/test_fabioh5.py b/src/silx/io/test/test_fabioh5.py new file mode 100755 index 0000000..c410024 --- /dev/null +++ b/src/silx/io/test/test_fabioh5.py @@ -0,0 +1,615 @@ +# coding: utf-8 +# /*########################################################################## +# Copyright (C) 2016-2018 European Synchrotron Radiation Facility +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. +# +# ############################################################################*/ +"""Tests for fabioh5 wrapper""" + +__authors__ = ["V. Valls"] +__license__ = "MIT" +__date__ = "02/07/2018" + +import os +import logging +import numpy +import unittest +import tempfile +import shutil + +_logger = logging.getLogger(__name__) + +import fabio +import h5py + +from .. import commonh5 +from .. import fabioh5 + + +class TestFabioH5(unittest.TestCase): + + def setUp(self): + + header = { + "integer": "-100", + "float": "1.0", + "string": "hi!", + "list_integer": "100 50 0", + "list_float": "1.0 2.0 3.5", + "string_looks_like_list": "2000 hi!", + } + data = numpy.array([[10, 11], [12, 13], [14, 15]], dtype=numpy.int64) + self.fabio_image = fabio.numpyimage.NumpyImage(data, header) + self.h5_image = fabioh5.File(fabio_image=self.fabio_image) + + def test_main_groups(self): + self.assertEqual(self.h5_image.h5py_class, h5py.File) + self.assertEqual(self.h5_image["/"].h5py_class, h5py.File) + self.assertEqual(self.h5_image["/scan_0"].h5py_class, h5py.Group) + self.assertEqual(self.h5_image["/scan_0/instrument"].h5py_class, h5py.Group) + self.assertEqual(self.h5_image["/scan_0/measurement"].h5py_class, h5py.Group) + + def test_wrong_path_syntax(self): + # result tested with a default h5py file + self.assertRaises(ValueError, lambda: self.h5_image[""]) + + def test_wrong_root_name(self): + # result tested with a default h5py file + self.assertRaises(KeyError, lambda: self.h5_image["/foo"]) + + def test_wrong_root_path(self): + # result tested with a default h5py file + self.assertRaises(KeyError, lambda: self.h5_image["/foo/foo"]) + + def test_wrong_name(self): + # result tested with a default h5py file + self.assertRaises(KeyError, lambda: self.h5_image["foo"]) + + def test_wrong_path(self): + # result tested with a default h5py file + self.assertRaises(KeyError, lambda: self.h5_image["foo/foo"]) + + def test_single_frame(self): + data = numpy.arange(2 * 3) + data.shape = 2, 3 + fabio_image = fabio.edfimage.edfimage(data=data) + h5_image = fabioh5.File(fabio_image=fabio_image) + + dataset = h5_image["/scan_0/instrument/detector_0/data"] + self.assertEqual(dataset.h5py_class, h5py.Dataset) + self.assertTrue(isinstance(dataset[()], numpy.ndarray)) + self.assertEqual(dataset.dtype.kind, "i") + self.assertEqual(dataset.shape, (2, 3)) + self.assertEqual(dataset[...][0, 0], 0) + self.assertEqual(dataset.attrs["interpretation"], "image") + + def test_multi_frames(self): + data = numpy.arange(2 * 3) + data.shape = 2, 3 + fabio_image = fabio.edfimage.edfimage(data=data) + fabio_image.append_frame(data=data) + h5_image = fabioh5.File(fabio_image=fabio_image) + + dataset = h5_image["/scan_0/instrument/detector_0/data"] + self.assertEqual(dataset.h5py_class, h5py.Dataset) + self.assertTrue(isinstance(dataset[()], numpy.ndarray)) + self.assertEqual(dataset.dtype.kind, "i") + self.assertEqual(dataset.shape, (2, 2, 3)) + self.assertEqual(dataset[...][0, 0, 0], 0) + self.assertEqual(dataset.attrs["interpretation"], "image") + + def test_heterogeneous_frames(self): + """Frames containing 2 images with different sizes and a cube""" + data1 = numpy.arange(2 * 3) + data1.shape = 2, 3 + data2 = numpy.arange(2 * 5) + data2.shape = 2, 5 + data3 = numpy.arange(2 * 5 * 1) + data3.shape = 2, 5, 1 + fabio_image = fabio.edfimage.edfimage(data=data1) + fabio_image.append_frame(data=data2) + fabio_image.append_frame(data=data3) + h5_image = fabioh5.File(fabio_image=fabio_image) + + dataset = h5_image["/scan_0/instrument/detector_0/data"] + self.assertEqual(dataset.h5py_class, h5py.Dataset) + self.assertTrue(isinstance(dataset[()], numpy.ndarray)) + self.assertEqual(dataset.dtype.kind, "i") + self.assertEqual(dataset.shape, (3, 2, 5, 1)) + self.assertEqual(dataset[...][0, 0, 0], 0) + self.assertEqual(dataset.attrs["interpretation"], "image") + + def test_single_3d_frame(self): + """Image source contains a cube""" + data = numpy.arange(2 * 3 * 4) + data.shape = 2, 3, 4 + # Do not provide the data to the constructor to avoid slicing of the + # data. In this way the result stay a cube, and not a multi-frame + fabio_image = fabio.edfimage.edfimage() + fabio_image.data = data + h5_image = fabioh5.File(fabio_image=fabio_image) + + dataset = h5_image["/scan_0/instrument/detector_0/data"] + self.assertEqual(dataset.h5py_class, h5py.Dataset) + self.assertTrue(isinstance(dataset[()], numpy.ndarray)) + self.assertEqual(dataset.dtype.kind, "i") + self.assertEqual(dataset.shape, (2, 3, 4)) + self.assertEqual(dataset[...][0, 0, 0], 0) + self.assertEqual(dataset.attrs["interpretation"], "image") + + def test_metadata_int(self): + dataset = self.h5_image["/scan_0/instrument/detector_0/others/integer"] + self.assertEqual(dataset.h5py_class, h5py.Dataset) + self.assertEqual(dataset[()], -100) + self.assertEqual(dataset.dtype.kind, "i") + self.assertEqual(dataset.shape, (1,)) + + def test_metadata_float(self): + dataset = self.h5_image["/scan_0/instrument/detector_0/others/float"] + self.assertEqual(dataset.h5py_class, h5py.Dataset) + self.assertEqual(dataset[()], 1.0) + self.assertEqual(dataset.dtype.kind, "f") + self.assertEqual(dataset.shape, (1,)) + + def test_metadata_string(self): + dataset = self.h5_image["/scan_0/instrument/detector_0/others/string"] + self.assertEqual(dataset.h5py_class, h5py.Dataset) + self.assertEqual(dataset[()], numpy.string_("hi!")) + self.assertEqual(dataset.dtype.type, numpy.string_) + self.assertEqual(dataset.shape, (1,)) + + def test_metadata_list_integer(self): + dataset = self.h5_image["/scan_0/instrument/detector_0/others/list_integer"] + self.assertEqual(dataset.h5py_class, h5py.Dataset) + self.assertEqual(dataset.dtype.kind, "u") + self.assertEqual(dataset.shape, (1, 3)) + self.assertEqual(dataset[0, 0], 100) + self.assertEqual(dataset[0, 1], 50) + + def test_metadata_list_float(self): + dataset = self.h5_image["/scan_0/instrument/detector_0/others/list_float"] + self.assertEqual(dataset.h5py_class, h5py.Dataset) + self.assertEqual(dataset.dtype.kind, "f") + self.assertEqual(dataset.shape, (1, 3)) + self.assertEqual(dataset[0, 0], 1.0) + self.assertEqual(dataset[0, 1], 2.0) + + def test_metadata_list_looks_like_list(self): + dataset = self.h5_image["/scan_0/instrument/detector_0/others/string_looks_like_list"] + self.assertEqual(dataset.h5py_class, h5py.Dataset) + self.assertEqual(dataset[()], numpy.string_("2000 hi!")) + self.assertEqual(dataset.dtype.type, numpy.string_) + self.assertEqual(dataset.shape, (1,)) + + def test_float_32(self): + float_list = [u'1.2', u'1.3', u'1.4'] + data = numpy.array([[0, 0], [0, 0]], dtype=numpy.int8) + fabio_image = None + for float_item in float_list: + header = {"float_item": float_item} + if fabio_image is None: + fabio_image = fabio.edfimage.EdfImage(data=data, header=header) + else: + fabio_image.append_frame(data=data, header=header) + h5_image = fabioh5.File(fabio_image=fabio_image) + data = h5_image["/scan_0/instrument/detector_0/others/float_item"] + # There is no equality between items + self.assertEqual(len(data), len(set(data))) + # At worst a float32 + self.assertIn(data.dtype.kind, ['d', 'f']) + self.assertLessEqual(data.dtype.itemsize, 32 / 8) + + def test_float_64(self): + float_list = [ + u'1469117129.082226', + u'1469117136.684986', u'1469117144.312749', u'1469117151.892507', + u'1469117159.474265', u'1469117167.100027', u'1469117174.815799', + u'1469117182.437561', u'1469117190.094326', u'1469117197.721089'] + data = numpy.array([[0, 0], [0, 0]], dtype=numpy.int8) + fabio_image = None + for float_item in float_list: + header = {"time_of_day": float_item} + if fabio_image is None: + fabio_image = fabio.edfimage.EdfImage(data=data, header=header) + else: + fabio_image.append_frame(data=data, header=header) + h5_image = fabioh5.File(fabio_image=fabio_image) + data = h5_image["/scan_0/instrument/detector_0/others/time_of_day"] + # There is no equality between items + self.assertEqual(len(data), len(set(data))) + # At least a float64 + self.assertIn(data.dtype.kind, ['d', 'f']) + self.assertGreaterEqual(data.dtype.itemsize, 64 / 8) + + def test_mixed_float_size__scalar(self): + # We expect to have a precision of 32 bits + float_list = [u'1.2', u'1.3001'] + expected_float_result = [1.2, 1.3001] + data = numpy.array([[0, 0], [0, 0]], dtype=numpy.int8) + fabio_image = None + for float_item in float_list: + header = {"float_item": float_item} + if fabio_image is None: + fabio_image = fabio.edfimage.EdfImage(data=data, header=header) + else: + fabio_image.append_frame(data=data, header=header) + h5_image = fabioh5.File(fabio_image=fabio_image) + data = h5_image["/scan_0/instrument/detector_0/others/float_item"] + # At worst a float32 + self.assertIn(data.dtype.kind, ['d', 'f']) + self.assertLessEqual(data.dtype.itemsize, 32 / 8) + for computed, expected in zip(data, expected_float_result): + numpy.testing.assert_almost_equal(computed, expected, 5) + + def test_mixed_float_size__list(self): + # We expect to have a precision of 32 bits + float_list = [u'1.2 1.3001'] + expected_float_result = numpy.array([[1.2, 1.3001]]) + data = numpy.array([[0, 0], [0, 0]], dtype=numpy.int8) + fabio_image = None + for float_item in float_list: + header = {"float_item": float_item} + if fabio_image is None: + fabio_image = fabio.edfimage.EdfImage(data=data, header=header) + else: + fabio_image.append_frame(data=data, header=header) + h5_image = fabioh5.File(fabio_image=fabio_image) + data = h5_image["/scan_0/instrument/detector_0/others/float_item"] + # At worst a float32 + self.assertIn(data.dtype.kind, ['d', 'f']) + self.assertLessEqual(data.dtype.itemsize, 32 / 8) + for computed, expected in zip(data, expected_float_result): + numpy.testing.assert_almost_equal(computed, expected, 5) + + def test_mixed_float_size__list_of_list(self): + # We expect to have a precision of 32 bits + float_list = [u'1.2 1.3001', u'1.3001 1.3001'] + expected_float_result = numpy.array([[1.2, 1.3001], [1.3001, 1.3001]]) + data = numpy.array([[0, 0], [0, 0]], dtype=numpy.int8) + fabio_image = None + for float_item in float_list: + header = {"float_item": float_item} + if fabio_image is None: + fabio_image = fabio.edfimage.EdfImage(data=data, header=header) + else: + fabio_image.append_frame(data=data, header=header) + h5_image = fabioh5.File(fabio_image=fabio_image) + data = h5_image["/scan_0/instrument/detector_0/others/float_item"] + # At worst a float32 + self.assertIn(data.dtype.kind, ['d', 'f']) + self.assertLessEqual(data.dtype.itemsize, 32 / 8) + for computed, expected in zip(data, expected_float_result): + numpy.testing.assert_almost_equal(computed, expected, 5) + + def test_ub_matrix(self): + """Data from mediapix.edf""" + header = {} + header["UB_mne"] = 'UB0 UB1 UB2 UB3 UB4 UB5 UB6 UB7 UB8' + header["UB_pos"] = '1.99593e-16 2.73682e-16 -1.54 -1.08894 1.08894 1.6083e-16 1.08894 1.08894 9.28619e-17' + header["sample_mne"] = 'U0 U1 U2 U3 U4 U5' + header["sample_pos"] = '4.08 4.08 4.08 90 90 90' + data = numpy.array([[0, 0], [0, 0]], dtype=numpy.int8) + fabio_image = fabio.edfimage.EdfImage(data=data, header=header) + h5_image = fabioh5.File(fabio_image=fabio_image) + sample = h5_image["/scan_0/sample"] + self.assertIsNotNone(sample) + self.assertEqual(sample.attrs["NXclass"], "NXsample") + + d = sample['unit_cell_abc'] + expected = numpy.array([4.08, 4.08, 4.08]) + self.assertIsNotNone(d) + self.assertEqual(d.shape, (3, )) + self.assertIn(d.dtype.kind, ['d', 'f']) + numpy.testing.assert_array_almost_equal(d[...], expected) + + d = sample['unit_cell_alphabetagamma'] + expected = numpy.array([90.0, 90.0, 90.0]) + self.assertIsNotNone(d) + self.assertEqual(d.shape, (3, )) + self.assertIn(d.dtype.kind, ['d', 'f']) + numpy.testing.assert_array_almost_equal(d[...], expected) + + d = sample['ub_matrix'] + expected = numpy.array([[[1.99593e-16, 2.73682e-16, -1.54], + [-1.08894, 1.08894, 1.6083e-16], + [1.08894, 1.08894, 9.28619e-17]]]) + self.assertIsNotNone(d) + self.assertEqual(d.shape, (1, 3, 3)) + self.assertIn(d.dtype.kind, ['d', 'f']) + numpy.testing.assert_array_almost_equal(d[...], expected) + + def test_interpretation_mca_edf(self): + """EDF files with two or more headers starting with "MCA" + must have @interpretation = "spectrum" an the data.""" + header = { + "Title": "zapimage samy -4.975 -5.095 80 500 samz -4.091 -4.171 70 0", + "MCA a": -23.812, + "MCA b": 2.7107, + "MCA c": 8.1164e-06} + + data = numpy.array([[0, 0], [0, 0]], dtype=numpy.int8) + fabio_image = fabio.edfimage.EdfImage(data=data, header=header) + h5_image = fabioh5.File(fabio_image=fabio_image) + + data_dataset = h5_image["/scan_0/measurement/image_0/data"] + self.assertEqual(data_dataset.attrs["interpretation"], "spectrum") + + data_dataset = h5_image["/scan_0/instrument/detector_0/data"] + self.assertEqual(data_dataset.attrs["interpretation"], "spectrum") + + data_dataset = h5_image["/scan_0/measurement/image_0/info/data"] + self.assertEqual(data_dataset.attrs["interpretation"], "spectrum") + + def test_get_api(self): + result = self.h5_image.get("scan_0", getclass=True, getlink=True) + self.assertIs(result, h5py.HardLink) + result = self.h5_image.get("scan_0", getclass=False, getlink=True) + self.assertIsInstance(result, h5py.HardLink) + result = self.h5_image.get("scan_0", getclass=True, getlink=False) + self.assertIs(result, h5py.Group) + result = self.h5_image.get("scan_0", getclass=False, getlink=False) + self.assertIsInstance(result, commonh5.Group) + + def test_detector_link(self): + detector1 = self.h5_image["/scan_0/instrument/detector_0"] + detector2 = self.h5_image["/scan_0/measurement/image_0/info"] + self.assertIsNot(detector1, detector2) + self.assertEqual(list(detector1.items()), list(detector2.items())) + self.assertEqual(self.h5_image.get(detector2.name, getlink=True).path, detector1.name) + + def test_detector_data_link(self): + data1 = self.h5_image["/scan_0/instrument/detector_0/data"] + data2 = self.h5_image["/scan_0/measurement/image_0/data"] + self.assertIsNot(data1, data2) + self.assertIs(data1._get_data(), data2._get_data()) + self.assertEqual(self.h5_image.get(data2.name, getlink=True).path, data1.name) + + def test_dirty_header(self): + """Test that it does not fail""" + try: + header = {} + header["foo"] = b'abc' + data = numpy.array([[0, 0], [0, 0]], dtype=numpy.int8) + fabio_image = fabio.edfimage.edfimage(data=data, header=header) + header = {} + header["foo"] = b'a\x90bc\xFE' + fabio_image.append_frame(data=data, header=header) + except Exception as e: + _logger.error(e.args[0]) + _logger.debug("Backtrace", exc_info=True) + self.skipTest("fabio do not allow to create the resource") + + h5_image = fabioh5.File(fabio_image=fabio_image) + scan_header_path = "/scan_0/instrument/file/scan_header" + self.assertIn(scan_header_path, h5_image) + data = h5_image[scan_header_path] + self.assertIsInstance(data[...], numpy.ndarray) + + def test_unicode_header(self): + """Test that it does not fail""" + try: + header = {} + header["foo"] = b'abc' + data = numpy.array([[0, 0], [0, 0]], dtype=numpy.int8) + fabio_image = fabio.edfimage.edfimage(data=data, header=header) + header = {} + header["foo"] = u'abc\u2764' + fabio_image.append_frame(data=data, header=header) + except Exception as e: + _logger.error(e.args[0]) + _logger.debug("Backtrace", exc_info=True) + self.skipTest("fabio do not allow to create the resource") + + h5_image = fabioh5.File(fabio_image=fabio_image) + scan_header_path = "/scan_0/instrument/file/scan_header" + self.assertIn(scan_header_path, h5_image) + data = h5_image[scan_header_path] + self.assertIsInstance(data[...], numpy.ndarray) + + +class TestFabioH5MultiFrames(unittest.TestCase): + + @classmethod + def setUpClass(cls): + + names = ["A", "B", "C", "D"] + values = [["32000", "-10", "5.0", "1"], + ["-32000", "-10", "5.0", "1"]] + + fabio_file = None + + for i in range(10): + header = { + "image_id": "%d" % i, + "integer": "-100", + "float": "1.0", + "string": "hi!", + "list_integer": "100 50 0", + "list_float": "1.0 2.0 3.5", + "string_looks_like_list": "2000 hi!", + "motor_mne": " ".join(names), + "motor_pos": " ".join(values[i % len(values)]), + "counter_mne": " ".join(names), + "counter_pos": " ".join(values[i % len(values)]) + } + for iname, name in enumerate(names): + header[name] = values[i % len(values)][iname] + + data = numpy.array([[i, 11], [12, 13], [14, 15]], dtype=numpy.int64) + if fabio_file is None: + fabio_file = fabio.edfimage.EdfImage(data=data, header=header) + else: + fabio_file.append_frame(data=data, header=header) + + cls.fabio_file = fabio_file + cls.fabioh5 = fabioh5.File(fabio_image=fabio_file) + + def test_others(self): + others = self.fabioh5["/scan_0/instrument/detector_0/others"] + dataset = others["A"] + self.assertGreaterEqual(dataset.dtype.itemsize, 1) + self.assertEqual(dataset.dtype.kind, "i") + dataset = others["B"] + self.assertGreaterEqual(dataset.dtype.itemsize, 1) + self.assertEqual(dataset.dtype.kind, "i") + dataset = others["C"] + self.assertGreaterEqual(dataset.dtype.itemsize, 1) + self.assertEqual(dataset.dtype.kind, "f") + dataset = others["D"] + self.assertGreaterEqual(dataset.dtype.itemsize, 1) + self.assertEqual(dataset.dtype.kind, "u") + + def test_positioners(self): + counters = self.fabioh5["/scan_0/instrument/positioners"] + # At least 32 bits, no unsigned values + dataset = counters["A"] + self.assertGreaterEqual(dataset.dtype.itemsize, 4) + self.assertEqual(dataset.dtype.kind, "i") + dataset = counters["B"] + self.assertGreaterEqual(dataset.dtype.itemsize, 4) + self.assertEqual(dataset.dtype.kind, "i") + dataset = counters["C"] + self.assertGreaterEqual(dataset.dtype.itemsize, 4) + self.assertEqual(dataset.dtype.kind, "f") + dataset = counters["D"] + self.assertGreaterEqual(dataset.dtype.itemsize, 4) + self.assertEqual(dataset.dtype.kind, "i") + + def test_counters(self): + counters = self.fabioh5["/scan_0/measurement"] + # At least 32 bits, no unsigned values + dataset = counters["A"] + self.assertGreaterEqual(dataset.dtype.itemsize, 4) + self.assertEqual(dataset.dtype.kind, "i") + dataset = counters["B"] + self.assertGreaterEqual(dataset.dtype.itemsize, 4) + self.assertEqual(dataset.dtype.kind, "i") + dataset = counters["C"] + self.assertGreaterEqual(dataset.dtype.itemsize, 4) + self.assertEqual(dataset.dtype.kind, "f") + dataset = counters["D"] + self.assertGreaterEqual(dataset.dtype.itemsize, 4) + self.assertEqual(dataset.dtype.kind, "i") + + +class TestFabioH5WithEdf(unittest.TestCase): + + @classmethod + def setUpClass(cls): + + cls.tmp_directory = tempfile.mkdtemp() + + cls.edf_filename = os.path.join(cls.tmp_directory, "test.edf") + + header = { + "integer": "-100", + "float": "1.0", + "string": "hi!", + "list_integer": "100 50 0", + "list_float": "1.0 2.0 3.5", + "string_looks_like_list": "2000 hi!", + } + data = numpy.array([[10, 11], [12, 13], [14, 15]], dtype=numpy.int64) + fabio_image = fabio.edfimage.edfimage(data, header) + fabio_image.write(cls.edf_filename) + + cls.fabio_image = fabio.open(cls.edf_filename) + cls.h5_image = fabioh5.File(fabio_image=cls.fabio_image) + + @classmethod + def tearDownClass(cls): + cls.fabio_image = None + cls.h5_image = None + shutil.rmtree(cls.tmp_directory) + + def test_reserved_format_metadata(self): + if fabio.hexversion < 327920: # 0.5.0 final + self.skipTest("fabio >= 0.5.0 final is needed") + + # The EDF contains reserved keys in the header + self.assertIn("HeaderID", self.fabio_image.header) + # We do not expose them in FabioH5 + self.assertNotIn("/scan_0/instrument/detector_0/others/HeaderID", self.h5_image) + + +class _TestableFrameData(fabioh5.FrameData): + """Allow to test if the full data is reached.""" + def _create_data(self): + raise RuntimeError("Not supposed to be called") + + +class TestFabioH5WithFileSeries(unittest.TestCase): + + @classmethod + def setUpClass(cls): + + cls.tmp_directory = tempfile.mkdtemp() + + cls.edf_filenames = [] + + for i in range(10): + filename = os.path.join(cls.tmp_directory, "test_%04d.edf" % i) + cls.edf_filenames.append(filename) + + header = { + "image_id": "%d" % i, + "integer": "-100", + "float": "1.0", + "string": "hi!", + "list_integer": "100 50 0", + "list_float": "1.0 2.0 3.5", + "string_looks_like_list": "2000 hi!", + } + data = numpy.array([[i, 11], [12, 13], [14, 15]], dtype=numpy.int64) + fabio_image = fabio.edfimage.edfimage(data, header) + fabio_image.write(filename) + + @classmethod + def tearDownClass(cls): + shutil.rmtree(cls.tmp_directory) + + def _testH5Image(self, h5_image): + # test data + dataset = h5_image["/scan_0/instrument/detector_0/data"] + self.assertEqual(dataset.h5py_class, h5py.Dataset) + self.assertTrue(isinstance(dataset[()], numpy.ndarray)) + self.assertEqual(dataset.dtype.kind, "i") + self.assertEqual(dataset.shape, (10, 3, 2)) + self.assertEqual(list(dataset[:, 0, 0]), list(range(10))) + self.assertEqual(dataset.attrs["interpretation"], "image") + # test metatdata + dataset = h5_image["/scan_0/instrument/detector_0/others/image_id"] + self.assertEqual(list(dataset[...]), list(range(10))) + + def testFileList(self): + h5_image = fabioh5.File(file_series=self.edf_filenames) + self._testH5Image(h5_image) + + def testFileSeries(self): + file_series = fabioh5._FileSeries(self.edf_filenames) + h5_image = fabioh5.File(file_series=file_series) + self._testH5Image(h5_image) + + def testFrameDataCache(self): + file_series = fabioh5._FileSeries(self.edf_filenames) + reader = fabioh5.FabioReader(file_series=file_series) + frameData = _TestableFrameData("foo", reader) + self.assertEqual(frameData.dtype.kind, "i") + self.assertEqual(frameData.shape, (10, 3, 2)) diff --git a/src/silx/io/test/test_fioh5.py b/src/silx/io/test/test_fioh5.py new file mode 100644 index 0000000..8ffb4ad --- /dev/null +++ b/src/silx/io/test/test_fioh5.py @@ -0,0 +1,299 @@ +# coding: utf-8 +# /*########################################################################## +# Copyright (C) 2021 Timo Fuchs +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. +# +# ############################################################################*/ +"""Tests for fioh5""" +import numpy +import os +import io +import sys +import tempfile +import unittest +import datetime +import logging + +from silx.utils import testutils + +from .. import fioh5 +from ..fioh5 import (FioH5, FioH5NodeDataset, is_fiofile, logger1, dtypeConverter) + +import h5py + +__authors__ = ["T. Fuchs"] +__license__ = "MIT" +__date__ = "15/10/2021" + +fioftext = """ +! +! Comments +! +%c +ascan omega 180.0 180.5 3:10/1 4 +user username, acquisition started at Thu Dec 12 18:00:00 2021 +sweep motor lag: 1.0e-03 +channel 3: Detector +! +! Parameter +! +%p +channel3_exposure = 1.000000e+00 +ScanName = ascan +! +! Data +! +%d + Col 1 omega(encoder) DOUBLE + Col 2 channel INTEGER + Col 3 filename STRING + Col 4 type STRING + Col 5 unix time DOUBLE + Col 6 enable BOOLEAN + Col 7 time_s FLOAT + 179.998418821 3 00001 exposure 1576165741.20308 1 1.243 + 180.048418821 3 00002 exposure 1576165742.20308 1 1.243 + 180.098418821 3 00003 exposure 1576165743.20308 1 1.243 + 180.148418821 3 00004 exposure 1576165744.20308 1 1.243 + 180.198418821 3 00005 exposure 1576165745.20308 1 1.243 + 180.248418821 3 00006 exposure 1576165746.20308 1 1.243 + 180.298418821 3 00007 exposure 1576165747.20308 1 1.243 + 180.348418821 3 00008 exposure 1576165748.20308 1 1.243 + 180.398418821 3 00009 exposure 1576165749.20308 1 1.243 + 180.448418821 3 00010 exposure 1576165750.20308 1 1.243 +""" + + + +class TestFioH5(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls.temp_dir = tempfile.TemporaryDirectory() + #fd, cls.fname = tempfile.mkstemp() + cls.fname_numbered = os.path.join(cls.temp_dir.name, "eh1scan_00005.fio") + + with open(cls.fname_numbered, 'w') as fiof: + fiof.write(fioftext) + + @classmethod + def tearDownClass(cls): + cls.temp_dir.cleanup() + del cls.temp_dir + + def setUp(self): + self.fioh5 = FioH5(self.fname_numbered) + + def tearDown(self): + self.fioh5.close() + + def testScanNumber(self): + # scan number is parsed from the file name. + self.assertIn("/5.1", self.fioh5) + self.assertIn("5.1", self.fioh5) + + def testContainsFile(self): + self.assertIn("/5.1/measurement", self.fioh5) + self.assertNotIn("25.2", self.fioh5) + # measurement is a child of a scan, full path would be required to + # access from root level + self.assertNotIn("measurement", self.fioh5) + # Groups may or may not have a trailing / + self.assertIn("/5.1/measurement/", self.fioh5) + self.assertIn("/5.1/measurement", self.fioh5) + # Datasets can't have a trailing / + self.assertIn("/5.1/measurement/omega(encoder)", self.fioh5) + self.assertNotIn("/5.1/measurement/omega(encoder)/", self.fioh5) + # No gamma + self.assertNotIn("/5.1/measurement/gamma", self.fioh5) + + def testContainsGroup(self): + self.assertIn("measurement", self.fioh5["/5.1/"]) + self.assertIn("measurement", self.fioh5["/5.1"]) + self.assertIn("5.1", self.fioh5["/"]) + self.assertNotIn("5.2", self.fioh5["/"]) + self.assertIn("measurement/filename", self.fioh5["/5.1"]) + # illegal trailing "/" after dataset name + self.assertNotIn("measurement/filename/", + self.fioh5["/5.1"]) + # full path to element in group (OK) + self.assertIn("/5.1/measurement/filename", + self.fioh5["/5.1/measurement"]) + + def testDataType(self): + meas = self.fioh5["/5.1/measurement/"] + self.assertEqual(meas["omega(encoder)"].dtype, dtypeConverter['DOUBLE']) + self.assertEqual(meas["channel"].dtype, dtypeConverter['INTEGER']) + self.assertEqual(meas["filename"].dtype, dtypeConverter['STRING']) + self.assertEqual(meas["time_s"].dtype, dtypeConverter['FLOAT']) + self.assertEqual(meas["enable"].dtype, dtypeConverter['BOOLEAN']) + + def testDataColumn(self): + self.assertAlmostEqual(sum(self.fioh5["/5.1/measurement/omega(encoder)"]), + 1802.23418821) + self.assertTrue(numpy.all(self.fioh5["/5.1/measurement/enable"])) + + # --- comment section tests --- + + def testComment(self): + # should hold the complete comment section + self.assertEqual(self.fioh5["/5.1/instrument/fiofile/comments"], +"""ascan omega 180.0 180.5 3:10/1 4 +user username, acquisition started at Thu Dec 12 18:00:00 2021 +sweep motor lag: 1.0e-03 +channel 3: Detector +""") + + def testDate(self): + # there is no convention on how to format the time. So just check its existence. + self.assertEqual(self.fioh5["/5.1/start_time"], + u"Thu Dec 12 18:00:00 2021") + + def testTitle(self): + self.assertEqual(self.fioh5["/5.1/title"], + u"ascan omega 180.0 180.5 3:10/1 4") + + + # --- parameter section tests --- + + def testParameter(self): + # should hold the complete parameter section + self.assertEqual(self.fioh5["/5.1/instrument/fiofile/parameter"], +"""channel3_exposure = 1.000000e+00 +ScanName = ascan +""") + + def testParsedParameter(self): + # no dtype is given, so everything is str. + self.assertEqual(self.fioh5["/5.1/instrument/parameter/channel3_exposure"], + u"1.000000e+00") + self.assertEqual(self.fioh5["/5.1/instrument/parameter/ScanName"], u"ascan") + + def testNotFioH5(self): + testfilename = os.path.join(self.temp_dir.name, "eh1scan_00010.fio") + with open(testfilename, 'w') as fiof: + fiof.write("!Not a fio file!") + + self.assertRaises(IOError, FioH5, testfilename) + + self.assertTrue(is_fiofile(self.fname_numbered)) + self.assertFalse(is_fiofile(testfilename)) + + os.unlink(testfilename) + + +class TestUnnumberedFioH5(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls.temp_dir = tempfile.TemporaryDirectory() + cls.fname_nosuffix = os.path.join(cls.temp_dir.name, "eh1scan_nosuffix.fio") + + with open(cls.fname_nosuffix, 'w') as fiof: + fiof.write(fioftext) + + @classmethod + def tearDownClass(cls): + cls.temp_dir.cleanup() + del cls.temp_dir + + def setUp(self): + self.fioh5 = FioH5(self.fname_nosuffix) + + def testLogMissingScanno(self): + with self.assertLogs(logger1,level='WARNING') as cm: + fioh5 = FioH5(self.fname_nosuffix) + self.assertIn("Cannot parse scan number of file", cm.output[0]) + + def testFallbackName(self): + self.assertIn("/eh1scan_nosuffix", self.fioh5) + +brokenHeaderText = """ +! +! Comments +! +%c +ascan omega 180.0 180.5 3:10/1 4 +user username, acquisited at Thu Dec 12 100 2021 +sweep motor lavgvf.0e-03 +channel 3: Detector +! +! Parameter +! +%p +channel3_exposu65 1.000000e+00 +ScanName = ascan +! +! Data +! +%d + Col 1 omega(encoder) DOUBLE + Col 2 channel INTEGER + Col 3 filename STRING + Col 4 type STRING + Col 5 unix time DOUBLE + 179.998418821 3 00001 exposure 1576165741.20308 + 180.048418821 3 00002 exposure 1576165742.20308 + 180.098418821 3 00003 exposure 1576165743.20308 + 180.148418821 3 00004 exposure 1576165744.20308 + 180.198418821 3 00005 exposure 1576165745.20308 + 180.248418821 3 00006 exposure 1576165746.20308 + 180.298418821 3 00007 exposure 1576165747.20308 + 180.348418821 3 00008 exposure 1576165748.20308 + 180.398418821 3 00009 exposure 1576165749.20308 + 180.448418821 3 00010 exposure 1576165750.20308 +""" + +class TestBrokenHeaderFioH5(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls.temp_dir = tempfile.TemporaryDirectory() + cls.fname_numbered = os.path.join(cls.temp_dir.name, "eh1scan_00005.fio") + + with open(cls.fname_numbered, 'w') as fiof: + fiof.write(brokenHeaderText) + + @classmethod + def tearDownClass(cls): + cls.temp_dir.cleanup() + del cls.temp_dir + + def setUp(self): + self.fioh5 = FioH5(self.fname_numbered) + + def testLogBrokenHeader(self): + with self.assertLogs(logger1,level='WARNING') as cm: + fioh5 = FioH5(self.fname_numbered) + self.assertIn("Cannot parse parameter section", cm.output[0]) + self.assertIn("Cannot parse default comment section", cm.output[1]) + + def testComment(self): + # should hold the complete comment section + self.assertEqual(self.fioh5["/5.1/instrument/fiofile/comments"], +"""ascan omega 180.0 180.5 3:10/1 4 +user username, acquisited at Thu Dec 12 100 2021 +sweep motor lavgvf.0e-03 +channel 3: Detector +""") + + def testParameter(self): + # should hold the complete parameter section + self.assertEqual(self.fioh5["/5.1/instrument/fiofile/parameter"], +"""channel3_exposu65 1.000000e+00 +ScanName = ascan +""") diff --git a/src/silx/io/test/test_h5py_utils.py b/src/silx/io/test/test_h5py_utils.py new file mode 100644 index 0000000..ea46eca --- /dev/null +++ b/src/silx/io/test/test_h5py_utils.py @@ -0,0 +1,451 @@ +# coding: utf-8 +# /*########################################################################## +# Copyright (C) 2016-2017 European Synchrotron Radiation Facility +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. +# +# ############################################################################*/ +"""Tests for h5py utilities""" + +__authors__ = ["W. de Nolf"] +__license__ = "MIT" +__date__ = "27/01/2020" + + +import unittest +import os +import sys +import time +import shutil +import logging +import tempfile +import multiprocessing +from contextlib import contextmanager + +from .. import h5py_utils +from ...utils.retry import RetryError, RetryTimeoutError + +IS_WINDOWS = sys.platform == "win32" +logger = logging.getLogger() + + +def _subprocess_context_main(queue, contextmgr, *args, **kw): + try: + with contextmgr(*args, **kw): + queue.put(None) + queue.get() + except Exception: + queue.put(None) + raise + + +@contextmanager +def _subprocess_context(contextmgr, *args, **kw): + print("\nSTART", os.getpid()) + timeout = kw.pop("timeout", 10) + queue = multiprocessing.Queue(maxsize=1) + p = multiprocessing.Process( + target=_subprocess_context_main, args=(queue, contextmgr) + args, kwargs=kw + ) + p.start() + try: + queue.get(timeout=timeout) + yield + finally: + queue.put(None) + p.join(timeout) + print(" EXIT", os.getpid()) + + +@contextmanager +def _open_context(filename, **kw): + try: + print(os.getpid(), "OPEN", filename, kw) + with h5py_utils.File(filename, **kw) as f: + if kw.get("mode") == "w": + f["check"] = True + f.flush() + yield f + except Exception: + print(" ", os.getpid(), "FAILED", filename, kw) + raise + else: + print(" ", os.getpid(), "CLOSED", filename, kw) + + +def _cause_segfault(): + import ctypes + + i = ctypes.c_char(b"a") + j = ctypes.pointer(i) + c = 0 + while True: + j[c] = b"a" + c += 1 + + +def _top_level_names_test(txtfilename, *args, **kw): + sys.stderr = open(os.devnull, "w") + + with open(txtfilename, mode="r") as f: + failcounter = int(f.readline().strip()) + + ncausefailure = kw.pop("ncausefailure") + faildelay = kw.pop("faildelay") + if failcounter < ncausefailure: + time.sleep(faildelay) + failcounter += 1 + with open(txtfilename, mode="w") as f: + f.write(str(failcounter)) + if failcounter % 2: + raise RetryError + else: + _cause_segfault() + return h5py_utils._top_level_names(*args, **kw) + + +top_level_names_test = h5py_utils.retry_in_subprocess()(_top_level_names_test) + + +def subtests(test): + def wrapper(self): + for subtest_options in self._subtests(): + print("\n====SUB TEST===\n") + print(f"sub test options: {subtest_options}") + with self.subTest(str(subtest_options)): + test(self) + + return wrapper + + +class TestH5pyUtils(unittest.TestCase): + def setUp(self): + self.test_dir = tempfile.mkdtemp() + + def tearDown(self): + shutil.rmtree(self.test_dir) + + def _subtests(self): + self._subtest_options = {"mode": "w"} + self.filename_generator = self._filenames() + yield self._subtest_options + self._subtest_options = {"mode": "w", "libver": "latest"} + self.filename_generator = self._filenames() + yield + + def _filenames(self): + i = 1 + while True: + filename = os.path.join(self.test_dir, "file{}.h5".format(i)) + with self._open_context(filename): + pass + yield filename + i += 1 + + def _new_filename(self): + return next(self.filename_generator) + + @contextmanager + def _open_context(self, filename, **kwargs): + kw = dict(self._subtest_options) + kw.update(kwargs) + with _open_context(filename, **kw) as f: + yield f + + @contextmanager + def _open_context_subprocess(self, filename, **kwargs): + kw = dict(self._subtest_options) + kw.update(kwargs) + with _subprocess_context(_open_context, filename, **kw): + yield + + def _assert_hdf5_data(self, f): + self.assertTrue(f["check"][()]) + + def _validate_hdf5_data(self, filename, swmr=False): + with self._open_context(filename, mode="r") as f: + self.assertEqual(f.swmr_mode, swmr) + self._assert_hdf5_data(f) + + @subtests + def test_modes_single_process(self): + """Test concurrent access to the different files from the same process""" + # When using HDF5_USE_FILE_LOCKING, open files with and without + # locking should raise an exception. HDF5_USE_FILE_LOCKING should + # be reset when all files are closed. + + orig = os.environ.get("HDF5_USE_FILE_LOCKING") + filename1 = self._new_filename() + self.assertEqual(orig, os.environ.get("HDF5_USE_FILE_LOCKING")) + filename2 = self._new_filename() + self.assertEqual(orig, os.environ.get("HDF5_USE_FILE_LOCKING")) + + with self._open_context(filename1, mode="r"): + locking1 = False + for mode in ["r", "w", "a"]: + locking2 = mode != "r" + raise_condition = not h5py_utils.HAS_LOCKING_ARGUMENT + raise_condition &= locking1 != locking2 + with self.assertRaisesIf(raise_condition, RuntimeError): + with self._open_context(filename2, mode=mode): + pass + self._validate_hdf5_data(filename1) + self._validate_hdf5_data(filename2) + self.assertEqual(orig, os.environ.get("HDF5_USE_FILE_LOCKING")) + + with self._open_context(filename1, mode="a"): + locking1 = True + for mode in ["r", "w", "a"]: + locking2 = mode != "r" + raise_condition = not h5py_utils.HAS_LOCKING_ARGUMENT + raise_condition &= locking1 != locking2 + with self.assertRaisesIf(raise_condition, RuntimeError): + with self._open_context(filename2, mode=mode): + pass + self._validate_hdf5_data(filename1) + self._validate_hdf5_data(filename2) + self.assertEqual(orig, os.environ.get("HDF5_USE_FILE_LOCKING")) + + @property + def _libver_low_bound_is_v108(self): + libver = self._subtest_options.get("libver") + return h5py_utils._libver_low_bound_is_v108(libver) + + @property + def _nonlocking_reader_before_writer(self): + """A non-locking reader must open the file before it is locked by a writer""" + if IS_WINDOWS and h5py_utils.HDF5_HAS_LOCKING_ARGUMENT: + return True + if not self._libver_low_bound_is_v108: + return True + return False + + @contextmanager + def assertRaisesIf(self, condition, *args, **kw): + if condition: + with self.assertRaises(*args, **kw): + yield + else: + yield + + @unittest.skipIf( + h5py_utils.HDF5_HAS_LOCKING_ARGUMENT != h5py_utils.H5PY_HAS_LOCKING_ARGUMENT, + "Versions of libhdf5 and h5py use incompatible file locking behaviour", + ) + @subtests + def test_modes_multi_process(self): + """Test concurrent access to the same file from different processes""" + filename = self._new_filename() + + nonlocking_reader_before_writer = self._nonlocking_reader_before_writer + writer_before_nonlocking_reader_exception = OSError + old_hdf5_on_windows = IS_WINDOWS and not h5py_utils.HDF5_HAS_LOCKING_ARGUMENT + locked_exception = OSError + + # File locked by a writer + unexpected_access = old_hdf5_on_windows and self._libver_low_bound_is_v108 + for wmode in ["w", "a"]: + with self._open_context_subprocess(filename, mode=wmode): + # Access by a second non-locking reader + with self.assertRaisesIf( + nonlocking_reader_before_writer, + writer_before_nonlocking_reader_exception, + ): + with self._open_context(filename, mode="r") as f: + self._assert_hdf5_data(f) + # No access by a second locking reader + if unexpected_access: + logger.warning("unexpected concurrent access by a locking reader") + with self.assertRaisesIf(not unexpected_access, locked_exception): + with self._open_context(filename, mode="r", locking=True) as f: + self._assert_hdf5_data(f) + # No access by a second writer + if unexpected_access: + logger.warning("unexpected concurrent access by a writer") + with self.assertRaisesIf(not unexpected_access, locked_exception): + with self._open_context(filename, mode="a") as f: + self._assert_hdf5_data(f) + # Check for file corruption + if not nonlocking_reader_before_writer: + self._validate_hdf5_data(filename) + self._validate_hdf5_data(filename) + + # File locked by a reader + unexpected_access = old_hdf5_on_windows + with _subprocess_context(_open_context, filename, mode="r", locking=True): + # Access by a non-locking reader + with self._open_context(filename, mode="r") as f: + self._assert_hdf5_data(f) + # Access by a locking reader + with self._open_context(filename, mode="r", locking=True) as f: + self._assert_hdf5_data(f) + # No access by a second writer + if unexpected_access: + logger.warning("unexpected concurrent access by a writer") + raise_condition = not unexpected_access + with self.assertRaisesIf(raise_condition, locked_exception): + with self._open_context(filename, mode="a") as f: + self._assert_hdf5_data(f) + # Check for file corruption + self._validate_hdf5_data(filename) + self._validate_hdf5_data(filename) + + # File open by a non-locking reader + with self._open_context_subprocess(filename, mode="r"): + # Access by a second non-locking reader + with self._open_context(filename, mode="r") as f: + self._assert_hdf5_data(f) + # Access by a second locking reader + with self._open_context(filename, mode="r", locking=True) as f: + self._assert_hdf5_data(f) + # Access by a second writer + with self._open_context(filename, mode="a") as f: + self._assert_hdf5_data(f) + # Check for file corruption + self._validate_hdf5_data(filename) + self._validate_hdf5_data(filename) + + @subtests + @unittest.skipIf(not h5py_utils.HAS_SWMR, "SWMR not supported") + def test_modes_multi_process_swmr(self): + filename = self._new_filename() + + with self._open_context(filename, mode="w", libver="latest") as f: + pass + + # File open by SWMR writer + with self._open_context_subprocess(filename, mode="a", swmr=True): + with self._open_context(filename, mode="r") as f: + assert f.swmr_mode + self._assert_hdf5_data(f) + with self.assertRaises(OSError): + with self._open_context(filename, mode="a") as f: + pass + self._validate_hdf5_data(filename, swmr=True) + + @subtests + def test_retry_defaults(self): + filename = self._new_filename() + + names = h5py_utils.top_level_names(filename) + self.assertEqual(names, []) + + names = h5py_utils.safe_top_level_names(filename) + self.assertEqual(names, []) + + names = h5py_utils.top_level_names(filename, include_only=None) + self.assertEqual(names, ["check"]) + + names = h5py_utils.safe_top_level_names(filename, include_only=None) + self.assertEqual(names, ["check"]) + + with h5py_utils.open_item(filename, "/check", validate=lambda x: False) as item: + self.assertEqual(item, None) + + with h5py_utils.open_item(filename, "/check", validate=None) as item: + self.assertTrue(item[()]) + + with self.assertRaises(RetryTimeoutError): + with h5py_utils.open_item( + filename, + "/check", + retry_timeout=0.1, + retry_invalid=True, + validate=lambda x: False, + ) as item: + pass + + ncall = 0 + + def validate(item): + nonlocal ncall + if ncall >= 1: + return True + else: + ncall += 1 + raise RetryError + + with h5py_utils.open_item( + filename, + "/check", + validate=validate, + retry_timeout=1, + retry_invalid=True, + ) as item: + self.assertTrue(item[()]) + + @subtests + def test_retry_custom(self): + filename = self._new_filename() + ncausefailure = 3 + faildelay = 0.1 + sufficient_timeout = ncausefailure * (faildelay + 10) + insufficient_timeout = ncausefailure * faildelay * 0.5 + + @h5py_utils.retry_contextmanager() + def open_item(filename, name): + nonlocal failcounter + if failcounter < ncausefailure: + time.sleep(faildelay) + failcounter += 1 + raise RetryError + with h5py_utils.File(filename) as h5file: + yield h5file[name] + + failcounter = 0 + kw = {"retry_timeout": sufficient_timeout} + with open_item(filename, "/check", **kw) as item: + self.assertTrue(item[()]) + + failcounter = 0 + kw = {"retry_timeout": insufficient_timeout} + with self.assertRaises(RetryTimeoutError): + with open_item(filename, "/check", **kw) as item: + pass + + @subtests + def test_retry_in_subprocess(self): + filename = self._new_filename() + txtfilename = os.path.join(self.test_dir, "failcounter.txt") + ncausefailure = 3 + faildelay = 0.1 + sufficient_timeout = ncausefailure * (faildelay + 10) + insufficient_timeout = ncausefailure * faildelay * 0.5 + + kw = { + "retry_timeout": sufficient_timeout, + "include_only": None, + "ncausefailure": ncausefailure, + "faildelay": faildelay, + } + with open(txtfilename, mode="w") as f: + f.write("0") + names = top_level_names_test(txtfilename, filename, **kw) + self.assertEqual(names, ["check"]) + + kw = { + "retry_timeout": insufficient_timeout, + "include_only": None, + "ncausefailure": ncausefailure, + "faildelay": faildelay, + } + with open(txtfilename, mode="w") as f: + f.write("0") + with self.assertRaises(RetryTimeoutError): + top_level_names_test(txtfilename, filename, **kw) diff --git a/src/silx/io/test/test_nxdata.py b/src/silx/io/test/test_nxdata.py new file mode 100644 index 0000000..9025d6d --- /dev/null +++ b/src/silx/io/test/test_nxdata.py @@ -0,0 +1,563 @@ +# coding: utf-8 +# /*########################################################################## +# Copyright (C) 2016-2021 European Synchrotron Radiation Facility +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. +# +# ############################################################################*/ +"""Tests for NXdata parsing""" + +__authors__ = ["P. Knobel"] +__license__ = "MIT" +__date__ = "24/03/2020" + + +import tempfile +import unittest +import h5py +import numpy + +from .. import nxdata + + +text_dtype = h5py.special_dtype(vlen=str) + + +class TestNXdata(unittest.TestCase): + def setUp(self): + tmp = tempfile.NamedTemporaryFile(prefix="nxdata_examples_", suffix=".h5", delete=True) + tmp.file.close() + self.h5fname = tmp.name + self.h5f = h5py.File(tmp.name, "w") + + # SCALARS + g0d = self.h5f.create_group("scalars") + + g0d0 = g0d.create_group("0D_scalar") + g0d0.attrs["NX_class"] = "NXdata" + g0d0.attrs["signal"] = "scalar" + g0d0.create_dataset("scalar", data=10) + g0d0.create_dataset("scalar_errors", data=0.1) + + g0d1 = g0d.create_group("2D_scalars") + g0d1.attrs["NX_class"] = "NXdata" + g0d1.attrs["signal"] = "scalars" + ds = g0d1.create_dataset("scalars", data=numpy.arange(3 * 10).reshape((3, 10))) + ds.attrs["interpretation"] = "scalar" + + g0d1 = g0d.create_group("4D_scalars") + g0d1.attrs["NX_class"] = "NXdata" + g0d1.attrs["signal"] = "scalars" + ds = g0d1.create_dataset("scalars", data=numpy.arange(2 * 2 * 3 * 10).reshape((2, 2, 3, 10))) + ds.attrs["interpretation"] = "scalar" + + # SPECTRA + g1d = self.h5f.create_group("spectra") + + g1d0 = g1d.create_group("1D_spectrum") + g1d0.attrs["NX_class"] = "NXdata" + g1d0.attrs["signal"] = "count" + g1d0.attrs["auxiliary_signals"] = numpy.array(["count2", "count3"], + dtype=text_dtype) + g1d0.attrs["axes"] = "energy_calib" + g1d0.attrs["uncertainties"] = numpy.array(["energy_errors", ], + dtype=text_dtype) + g1d0.create_dataset("count", data=numpy.arange(10)) + g1d0.create_dataset("count2", data=0.5 * numpy.arange(10)) + d = g1d0.create_dataset("count3", data=0.4 * numpy.arange(10)) + d.attrs["long_name"] = "3rd counter" + g1d0.create_dataset("title", data="Title as dataset (like nexpy)") + g1d0.create_dataset("energy_calib", data=(10, 5)) # 10 * idx + 5 + g1d0.create_dataset("energy_errors", data=3.14 * numpy.random.rand(10)) + + g1d1 = g1d.create_group("2D_spectra") + g1d1.attrs["NX_class"] = "NXdata" + g1d1.attrs["signal"] = "counts" + ds = g1d1.create_dataset("counts", data=numpy.arange(3 * 10).reshape((3, 10))) + ds.attrs["interpretation"] = "spectrum" + + g1d2 = g1d.create_group("4D_spectra") + g1d2.attrs["NX_class"] = "NXdata" + g1d2.attrs["signal"] = "counts" + g1d2.attrs["axes"] = numpy.array(["energy", ], dtype=text_dtype) + ds = g1d2.create_dataset("counts", data=numpy.arange(2 * 2 * 3 * 10).reshape((2, 2, 3, 10))) + ds.attrs["interpretation"] = "spectrum" + ds = g1d2.create_dataset("errors", data=4.5 * numpy.random.rand(2, 2, 3, 10)) + ds = g1d2.create_dataset("energy", data=5 + 10 * numpy.arange(15), + shuffle=True, compression="gzip") + ds.attrs["long_name"] = "Calibrated energy" + ds.attrs["first_good"] = 3 + ds.attrs["last_good"] = 12 + g1d2.create_dataset("energy_errors", data=10 * numpy.random.rand(15)) + + # IMAGES + g2d = self.h5f.create_group("images") + + g2d0 = g2d.create_group("2D_regular_image") + g2d0.attrs["NX_class"] = "NXdata" + g2d0.attrs["signal"] = "image" + g2d0.attrs["auxiliary_signals"] = "image2" + g2d0.attrs["axes"] = numpy.array(["rows_calib", "columns_coordinates"], + dtype=text_dtype) + g2d0.create_dataset("image", data=numpy.arange(4 * 6).reshape((4, 6))) + g2d0.create_dataset("image2", data=numpy.arange(4 * 6).reshape((4, 6))) + ds = g2d0.create_dataset("rows_calib", data=(10, 5)) + ds.attrs["long_name"] = "Calibrated Y" + g2d0.create_dataset("columns_coordinates", data=0.5 + 0.02 * numpy.arange(6)) + + g2d1 = g2d.create_group("2D_irregular_data") + g2d1.attrs["NX_class"] = "NXdata" + g2d1.attrs["signal"] = "data" + g2d1.attrs["title"] = "Title as group attr" + g2d1.attrs["axes"] = numpy.array(["rows_coordinates", "columns_coordinates"], + dtype=text_dtype) + g2d1.create_dataset("data", data=numpy.arange(64 * 128).reshape((64, 128))) + g2d1.create_dataset("rows_coordinates", data=numpy.arange(64) + numpy.random.rand(64)) + g2d1.create_dataset("columns_coordinates", data=numpy.arange(128) + 2.5 * numpy.random.rand(128)) + + g2d2 = g2d.create_group("3D_images") + g2d2.attrs["NX_class"] = "NXdata" + g2d2.attrs["signal"] = "images" + ds = g2d2.create_dataset("images", data=numpy.arange(2 * 4 * 6).reshape((2, 4, 6))) + ds.attrs["interpretation"] = "image" + + g2d3 = g2d.create_group("5D_images") + g2d3.attrs["NX_class"] = "NXdata" + g2d3.attrs["signal"] = "images" + g2d3.attrs["axes"] = numpy.array(["rows_coordinates", "columns_coordinates"], + dtype=text_dtype) + ds = g2d3.create_dataset("images", data=numpy.arange(2 * 2 * 2 * 4 * 6).reshape((2, 2, 2, 4, 6))) + ds.attrs["interpretation"] = "image" + g2d3.create_dataset("rows_coordinates", data=5 + 10 * numpy.arange(4)) + g2d3.create_dataset("columns_coordinates", data=0.5 + 0.02 * numpy.arange(6)) + + g2d4 = g2d.create_group("RGBA_image") + g2d4.attrs["NX_class"] = "NXdata" + g2d4.attrs["signal"] = "image" + g2d4.attrs["axes"] = numpy.array(["rows_calib", "columns_coordinates"], + dtype=text_dtype) + rgba_image = numpy.linspace(0, 1, num=7*8*3).reshape((7, 8, 3)) + rgba_image[:, :, 1] = 1 - rgba_image[:, :, 1] # invert G channel to add some color + ds = g2d4.create_dataset("image", data=rgba_image) + ds.attrs["interpretation"] = "rgba-image" + ds = g2d4.create_dataset("rows_calib", data=(10, 5)) + ds.attrs["long_name"] = "Calibrated Y" + g2d4.create_dataset("columns_coordinates", data=0.5+0.02*numpy.arange(8)) + + # SCATTER + g = self.h5f.create_group("scatters") + + gd0 = g.create_group("x_y_scatter") + gd0.attrs["NX_class"] = "NXdata" + gd0.attrs["signal"] = "y" + gd0.attrs["axes"] = numpy.array(["x", ], dtype=text_dtype) + gd0.create_dataset("y", data=numpy.random.rand(128) - 0.5) + gd0.create_dataset("x", data=2 * numpy.random.rand(128)) + gd0.create_dataset("x_errors", data=0.05 * numpy.random.rand(128)) + gd0.create_dataset("errors", data=0.05 * numpy.random.rand(128)) + + gd1 = g.create_group("x_y_value_scatter") + gd1.attrs["NX_class"] = "NXdata" + gd1.attrs["signal"] = "values" + gd1.attrs["axes"] = numpy.array(["x", "y"], dtype=text_dtype) + gd1.create_dataset("values", data=3.14 * numpy.random.rand(128)) + gd1.create_dataset("y", data=numpy.random.rand(128)) + gd1.create_dataset("y_errors", data=0.02 * numpy.random.rand(128)) + gd1.create_dataset("x", data=numpy.random.rand(128)) + gd1.create_dataset("x_errors", data=0.02 * numpy.random.rand(128)) + + def tearDown(self): + self.h5f.close() + + def testValidity(self): + for group in self.h5f: + for subgroup in self.h5f[group]: + self.assertTrue( + nxdata.is_valid_nxdata(self.h5f[group][subgroup]), + "%s/%s not found to be a valid NXdata group" % (group, subgroup)) + + def testScalars(self): + nxd = nxdata.NXdata(self.h5f["scalars/0D_scalar"]) + self.assertTrue(nxd.signal_is_0d) + self.assertEqual(nxd.signal[()], 10) + self.assertEqual(nxd.axes_names, []) + self.assertEqual(nxd.axes_dataset_names, []) + self.assertEqual(nxd.axes, []) + self.assertIsNotNone(nxd.errors) + self.assertFalse(nxd.is_scatter or nxd.is_x_y_value_scatter) + self.assertIsNone(nxd.interpretation) + + nxd = nxdata.NXdata(self.h5f["scalars/2D_scalars"]) + self.assertTrue(nxd.signal_is_2d) + self.assertEqual(nxd.signal[1, 2], 12) + self.assertEqual(nxd.axes_names, [None, None]) + self.assertEqual(nxd.axes_dataset_names, [None, None]) + self.assertEqual(nxd.axes, [None, None]) + self.assertIsNone(nxd.errors) + self.assertFalse(nxd.is_scatter or nxd.is_x_y_value_scatter) + self.assertEqual(nxd.interpretation, "scalar") + + nxd = nxdata.NXdata(self.h5f["scalars/4D_scalars"]) + self.assertFalse(nxd.signal_is_0d or nxd.signal_is_1d or + nxd.signal_is_2d or nxd.signal_is_3d) + self.assertEqual(nxd.signal[1, 0, 1, 4], 74) + self.assertEqual(nxd.axes_names, [None, None, None, None]) + self.assertEqual(nxd.axes_dataset_names, [None, None, None, None]) + self.assertEqual(nxd.axes, [None, None, None, None]) + self.assertIsNone(nxd.errors) + self.assertFalse(nxd.is_scatter or nxd.is_x_y_value_scatter) + self.assertEqual(nxd.interpretation, "scalar") + + def testSpectra(self): + nxd = nxdata.NXdata(self.h5f["spectra/1D_spectrum"]) + self.assertTrue(nxd.signal_is_1d) + self.assertTrue(nxd.is_curve) + self.assertTrue(numpy.array_equal(numpy.array(nxd.signal), + numpy.arange(10))) + self.assertEqual(nxd.axes_names, ["energy_calib"]) + self.assertEqual(nxd.axes_dataset_names, ["energy_calib"]) + self.assertEqual(nxd.axes[0][0], 10) + self.assertEqual(nxd.axes[0][1], 5) + self.assertIsNone(nxd.errors) + self.assertFalse(nxd.is_scatter or nxd.is_x_y_value_scatter) + self.assertIsNone(nxd.interpretation) + self.assertEqual(nxd.title, "Title as dataset (like nexpy)") + + self.assertEqual(nxd.auxiliary_signals_dataset_names, + ["count2", "count3"]) + self.assertEqual(nxd.auxiliary_signals_names, + ["count2", "3rd counter"]) + self.assertAlmostEqual(nxd.auxiliary_signals[1][2], + 0.8) # numpy.arange(10) * 0.4 + + nxd = nxdata.NXdata(self.h5f["spectra/2D_spectra"]) + self.assertTrue(nxd.signal_is_2d) + self.assertTrue(nxd.is_curve) + self.assertEqual(nxd.axes_names, [None, None]) + self.assertEqual(nxd.axes_dataset_names, [None, None]) + self.assertEqual(nxd.axes, [None, None]) + self.assertIsNone(nxd.errors) + self.assertFalse(nxd.is_scatter or nxd.is_x_y_value_scatter) + self.assertEqual(nxd.interpretation, "spectrum") + + nxd = nxdata.NXdata(self.h5f["spectra/4D_spectra"]) + self.assertFalse(nxd.signal_is_0d or nxd.signal_is_1d or + nxd.signal_is_2d or nxd.signal_is_3d) + self.assertTrue(nxd.is_curve) + self.assertEqual(nxd.axes_names, + [None, None, None, "Calibrated energy"]) + self.assertEqual(nxd.axes_dataset_names, + [None, None, None, "energy"]) + self.assertEqual(nxd.axes[:3], [None, None, None]) + self.assertEqual(nxd.axes[3].shape, (10, )) # dataset shape (15, ) sliced [3:12] + self.assertIsNotNone(nxd.errors) + self.assertEqual(nxd.errors.shape, (2, 2, 3, 10)) + self.assertFalse(nxd.is_scatter or nxd.is_x_y_value_scatter) + self.assertEqual(nxd.interpretation, "spectrum") + self.assertEqual(nxd.get_axis_errors("energy").shape, + (10,)) + # test getting axis errors by long_name + self.assertTrue(numpy.array_equal(nxd.get_axis_errors("Calibrated energy"), + nxd.get_axis_errors("energy"))) + self.assertTrue(numpy.array_equal(nxd.get_axis_errors(b"Calibrated energy"), + nxd.get_axis_errors("energy"))) + + def testImages(self): + nxd = nxdata.NXdata(self.h5f["images/2D_regular_image"]) + self.assertTrue(nxd.signal_is_2d) + self.assertTrue(nxd.is_image) + self.assertEqual(nxd.axes_names, ["Calibrated Y", "columns_coordinates"]) + self.assertEqual(list(nxd.axes_dataset_names), + ["rows_calib", "columns_coordinates"]) + self.assertIsNone(nxd.errors) + self.assertFalse(nxd.is_scatter or nxd.is_x_y_value_scatter) + self.assertIsNone(nxd.interpretation) + self.assertEqual(len(nxd.auxiliary_signals), 1) + self.assertEqual(nxd.auxiliary_signals_names, ["image2"]) + + nxd = nxdata.NXdata(self.h5f["images/2D_irregular_data"]) + self.assertTrue(nxd.signal_is_2d) + self.assertTrue(nxd.is_image) + + self.assertEqual(nxd.axes_dataset_names, nxd.axes_names) + self.assertEqual(list(nxd.axes_dataset_names), + ["rows_coordinates", "columns_coordinates"]) + self.assertEqual(len(nxd.axes), 2) + self.assertIsNone(nxd.errors) + self.assertFalse(nxd.is_scatter or nxd.is_x_y_value_scatter) + self.assertIsNone(nxd.interpretation) + self.assertEqual(nxd.title, "Title as group attr") + + nxd = nxdata.NXdata(self.h5f["images/5D_images"]) + self.assertTrue(nxd.is_image) + self.assertFalse(nxd.signal_is_0d or nxd.signal_is_1d or + nxd.signal_is_2d or nxd.signal_is_3d) + self.assertEqual(nxd.axes_names, + [None, None, None, 'rows_coordinates', 'columns_coordinates']) + self.assertEqual(nxd.axes_dataset_names, + [None, None, None, 'rows_coordinates', 'columns_coordinates']) + self.assertIsNone(nxd.errors) + self.assertFalse(nxd.is_scatter or nxd.is_x_y_value_scatter) + self.assertEqual(nxd.interpretation, "image") + + nxd = nxdata.NXdata(self.h5f["images/RGBA_image"]) + self.assertTrue(nxd.is_image) + self.assertEqual(nxd.interpretation, "rgba-image") + self.assertTrue(nxd.signal_is_3d) + self.assertEqual(nxd.axes_names, ["Calibrated Y", + "columns_coordinates", + None]) + self.assertEqual(list(nxd.axes_dataset_names), + ["rows_calib", "columns_coordinates", None]) + + def testScatters(self): + nxd = nxdata.NXdata(self.h5f["scatters/x_y_scatter"]) + self.assertTrue(nxd.signal_is_1d) + self.assertEqual(nxd.axes_names, ["x"]) + self.assertEqual(nxd.axes_dataset_names, + ["x"]) + self.assertIsNotNone(nxd.errors) + self.assertEqual(nxd.get_axis_errors("x").shape, + (128, )) + self.assertTrue(nxd.is_scatter) + self.assertFalse(nxd.is_x_y_value_scatter) + self.assertIsNone(nxd.interpretation) + + nxd = nxdata.NXdata(self.h5f["scatters/x_y_value_scatter"]) + self.assertFalse(nxd.signal_is_1d) + self.assertTrue(nxd.axes_dataset_names, + nxd.axes_names) + self.assertEqual(nxd.axes_dataset_names, + ["x", "y"]) + self.assertEqual(nxd.get_axis_errors("x").shape, + (128, )) + self.assertEqual(nxd.get_axis_errors("y").shape, + (128, )) + self.assertEqual(len(nxd.axes), 2) + self.assertIsNone(nxd.errors) + self.assertTrue(nxd.is_scatter) + self.assertTrue(nxd.is_x_y_value_scatter) + self.assertIsNone(nxd.interpretation) + + +class TestLegacyNXdata(unittest.TestCase): + def setUp(self): + tmp = tempfile.NamedTemporaryFile(prefix="nxdata_legacy_examples_", + suffix=".h5", delete=True) + tmp.file.close() + self.h5fname = tmp.name + self.h5f = h5py.File(tmp.name, "w") + + def tearDown(self): + self.h5f.close() + + def testSignalAttrOnDataset(self): + g = self.h5f.create_group("2D") + g.attrs["NX_class"] = "NXdata" + + ds0 = g.create_dataset("image0", + data=numpy.arange(4 * 6).reshape((4, 6))) + ds0.attrs["signal"] = 1 + ds0.attrs["long_name"] = "My first image" + + ds1 = g.create_dataset("image1", + data=numpy.arange(4 * 6).reshape((4, 6))) + ds1.attrs["signal"] = "2" + ds1.attrs["long_name"] = "My 2nd image" + + ds2 = g.create_dataset("image2", + data=numpy.arange(4 * 6).reshape((4, 6))) + ds2.attrs["signal"] = 3 + + nxd = nxdata.NXdata(self.h5f["2D"]) + + self.assertEqual(nxd.signal_dataset_name, "image0") + self.assertEqual(nxd.signal_name, "My first image") + self.assertEqual(nxd.signal.shape, + (4, 6)) + + self.assertEqual(len(nxd.auxiliary_signals), 2) + self.assertEqual(nxd.auxiliary_signals[1].shape, + (4, 6)) + + self.assertEqual(nxd.auxiliary_signals_dataset_names, + ["image1", "image2"]) + self.assertEqual(nxd.auxiliary_signals_names, + ["My 2nd image", "image2"]) + + def testAxesOnSignalDataset(self): + g = self.h5f.create_group("2D") + g.attrs["NX_class"] = "NXdata" + + ds0 = g.create_dataset("image0", + data=numpy.arange(4 * 6).reshape((4, 6))) + ds0.attrs["signal"] = 1 + ds0.attrs["axes"] = "yaxis:xaxis" + + ds1 = g.create_dataset("yaxis", + data=numpy.arange(4)) + ds2 = g.create_dataset("xaxis", + data=numpy.arange(6)) + + nxd = nxdata.NXdata(self.h5f["2D"]) + + self.assertEqual(nxd.axes_dataset_names, + ["yaxis", "xaxis"]) + self.assertTrue(numpy.array_equal(nxd.axes[0], + numpy.arange(4))) + self.assertTrue(numpy.array_equal(nxd.axes[1], + numpy.arange(6))) + + def testAxesOnAxesDatasets(self): + g = self.h5f.create_group("2D") + g.attrs["NX_class"] = "NXdata" + + ds0 = g.create_dataset("image0", + data=numpy.arange(4 * 6).reshape((4, 6))) + ds0.attrs["signal"] = 1 + ds1 = g.create_dataset("yaxis", + data=numpy.arange(4)) + ds1.attrs["axis"] = 0 + ds2 = g.create_dataset("xaxis", + data=numpy.arange(6)) + ds2.attrs["axis"] = "1" + + nxd = nxdata.NXdata(self.h5f["2D"]) + self.assertEqual(nxd.axes_dataset_names, + ["yaxis", "xaxis"]) + self.assertTrue(numpy.array_equal(nxd.axes[0], + numpy.arange(4))) + self.assertTrue(numpy.array_equal(nxd.axes[1], + numpy.arange(6))) + + def testAsciiUndefinedAxesAttrs(self): + """Some files may not be using utf8 for str attrs""" + g = self.h5f.create_group("bytes_attrs") + g.attrs["NX_class"] = b"NXdata" + g.attrs["signal"] = b"image0" + g.attrs["axes"] = b"yaxis", b"." + + g.create_dataset("image0", + data=numpy.arange(4 * 6).reshape((4, 6))) + g.create_dataset("yaxis", + data=numpy.arange(4)) + + nxd = nxdata.NXdata(self.h5f["bytes_attrs"]) + self.assertEqual(nxd.axes_dataset_names, + ["yaxis", None]) + + +class TestSaveNXdata(unittest.TestCase): + def setUp(self): + tmp = tempfile.NamedTemporaryFile(prefix="nxdata", + suffix=".h5", delete=True) + tmp.file.close() + self.h5fname = tmp.name + + def testSimpleSave(self): + sig = numpy.array([0, 1, 2]) + a0 = numpy.array([2, 3, 4]) + a1 = numpy.array([3, 4, 5]) + nxdata.save_NXdata(filename=self.h5fname, + signal=sig, + axes=[a0, a1], + signal_name="sig", + axes_names=["a0", "a1"], + nxentry_name="a", + nxdata_name="mydata") + + h5f = h5py.File(self.h5fname, "r") + self.assertTrue(nxdata.is_valid_nxdata(h5f["a/mydata"])) + + nxd = nxdata.NXdata(h5f["/a/mydata"]) + self.assertTrue(numpy.array_equal(nxd.signal, + sig)) + self.assertTrue(numpy.array_equal(nxd.axes[0], + a0)) + + h5f.close() + + def testSimplestSave(self): + sig = numpy.array([0, 1, 2]) + nxdata.save_NXdata(filename=self.h5fname, + signal=sig) + + h5f = h5py.File(self.h5fname, "r") + + self.assertTrue(nxdata.is_valid_nxdata(h5f["/entry/data0"])) + + nxd = nxdata.NXdata(h5f["/entry/data0"]) + self.assertTrue(numpy.array_equal(nxd.signal, + sig)) + h5f.close() + + def testSaveDefaultAxesNames(self): + sig = numpy.array([0, 1, 2]) + a0 = numpy.array([2, 3, 4]) + a1 = numpy.array([3, 4, 5]) + nxdata.save_NXdata(filename=self.h5fname, + signal=sig, + axes=[a0, a1], + signal_name="sig", + axes_names=None, + axes_long_names=["a", "b"], + nxentry_name="a", + nxdata_name="mydata") + + h5f = h5py.File(self.h5fname, "r") + self.assertTrue(nxdata.is_valid_nxdata(h5f["a/mydata"])) + + nxd = nxdata.NXdata(h5f["/a/mydata"]) + self.assertTrue(numpy.array_equal(nxd.signal, + sig)) + self.assertTrue(numpy.array_equal(nxd.axes[0], + a0)) + self.assertEqual(nxd.axes_dataset_names, + [u"dim0", u"dim1"]) + self.assertEqual(nxd.axes_names, + [u"a", u"b"]) + + h5f.close() + + def testSaveToExistingEntry(self): + h5f = h5py.File(self.h5fname, "w") + g = h5f.create_group("myentry") + g.attrs["NX_class"] = "NXentry" + h5f.close() + + sig = numpy.array([0, 1, 2]) + a0 = numpy.array([2, 3, 4]) + a1 = numpy.array([3, 4, 5]) + nxdata.save_NXdata(filename=self.h5fname, + signal=sig, + axes=[a0, a1], + signal_name="sig", + axes_names=["a0", "a1"], + nxentry_name="myentry", + nxdata_name="toto") + + h5f = h5py.File(self.h5fname, "r") + self.assertTrue(nxdata.is_valid_nxdata(h5f["myentry/toto"])) + + nxd = nxdata.NXdata(h5f["myentry/toto"]) + self.assertTrue(numpy.array_equal(nxd.signal, + sig)) + self.assertTrue(numpy.array_equal(nxd.axes[0], + a0)) + h5f.close() diff --git a/src/silx/io/test/test_octaveh5.py b/src/silx/io/test/test_octaveh5.py new file mode 100644 index 0000000..1c3b3e0 --- /dev/null +++ b/src/silx/io/test/test_octaveh5.py @@ -0,0 +1,156 @@ +# coding: utf-8 +# /*########################################################################## +# Copyright (C) 2016 European Synchrotron Radiation Facility +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. +# +# ############################################################################*/ +""" +Tests for the octaveh5 module +""" + +__authors__ = ["C. Nemoz", "H. Payno"] +__license__ = "MIT" +__date__ = "12/07/2016" + +import unittest +import os +import tempfile + +try: + from ..octaveh5 import Octaveh5 +except ImportError: + Octaveh5 = None + + +@unittest.skipIf(Octaveh5 is None, "Could not import h5py") +class TestOctaveH5(unittest.TestCase): + @staticmethod + def _get_struct_FT(): + return { + 'NO_CHECK': 0.0, 'SHOWSLICE': 1.0, 'DOTOMO': 1.0, 'DATABASE': 0.0, 'ANGLE_OFFSET': 0.0, + 'VOLSELECTION_REMEMBER': 0.0, 'NUM_PART': 4.0, 'VOLOUTFILE': 0.0, 'RINGSCORRECTION': 0.0, + 'DO_TEST_SLICE': 1.0, 'ZEROOFFMASK': 1.0, 'VERSION': 'fastomo3 version 2.0', + 'CORRECT_SPIKES_THRESHOLD': 0.040000000000000001, 'SHOWPROJ': 0.0, 'HALF_ACQ': 0.0, + 'ANGLE_OFFSET_VALUE': 0.0, 'FIXEDSLICE': 'middle', 'VOLSELECT': 'total' } + @staticmethod + def _get_struct_PYHSTEXE(): + return { + 'EXE': 'PyHST2_2015d', 'VERBOSE': 0.0, 'OFFV': 'PyHST2_2015d', 'TOMO': 0.0, + 'VERBOSE_FILE': 'pyhst_out.txt', 'DIR': '/usr/bin/', 'OFFN': 'pyhst2'} + + @staticmethod + def _get_struct_FTAXIS(): + return { + 'POSITION_VALUE': 12345.0, 'COR_ERROR': 0.0, 'FILESDURINGSCAN': 0.0, 'PLOTFIGURE': 1.0, + 'DIM1': 0.0, 'OVERSAMPLING': 5.0, 'TO_THE_CENTER': 1.0, 'POSITION': 'fixed', + 'COR_POSITION': 0.0, 'HA': 0.0 } + + @staticmethod + def _get_struct_PAGANIN(): + return { + 'MKEEP_MASK': 0.0, 'UNSHARP_SIGMA': 0.80000000000000004, 'DILATE': 2.0, 'UNSHARP_COEFF': 3.0, + 'MEDIANR': 4.0, 'DB': 500.0, 'MKEEP_ABS': 0.0, 'MODE': 0.0, 'THRESHOLD': 0.5, + 'MKEEP_BONE': 0.0, 'DB2': 100.0, 'MKEEP_CORR': 0.0, 'MKEEP_SOFT': 0.0 } + + @staticmethod + def _get_struct_BEAMGEO(): + return {'DIST': 55.0, 'SY': 0.0, 'SX': 0.0, 'TYPE': 'p'} + + + def setUp(self): + self.tempdir = tempfile.mkdtemp() + self.test_3_6_fname = os.path.join(self.tempdir, "silx_tmp_t00_octaveTest_3_6.h5") + self.test_3_8_fname = os.path.join(self.tempdir, "silx_tmp_t00_octaveTest_3_8.h5") + + def tearDown(self): + if os.path.isfile(self.test_3_6_fname): + os.unlink(self.test_3_6_fname) + if os.path.isfile(self.test_3_8_fname): + os.unlink(self.test_3_8_fname) + + def testWritedIsReaded(self): + """ + Simple test to write and reaf the structure compatible with the octave h5 using structure. + This test is for # test for octave version > 3.8 + """ + writer = Octaveh5() + + writer.open(self.test_3_8_fname, 'a') + # step 1 writing the file + writer.write('FT', self._get_struct_FT()) + writer.write('PYHSTEXE', self._get_struct_PYHSTEXE()) + writer.write('FTAXIS', self._get_struct_FTAXIS()) + writer.write('PAGANIN', self._get_struct_PAGANIN()) + writer.write('BEAMGEO', self._get_struct_BEAMGEO()) + writer.close() + + # step 2 reading the file + reader = Octaveh5().open(self.test_3_8_fname) + # 2.1 check FT + data_readed = reader.get('FT') + self.assertEqual(data_readed, self._get_struct_FT() ) + # 2.2 check PYHSTEXE + data_readed = reader.get('PYHSTEXE') + self.assertEqual(data_readed, self._get_struct_PYHSTEXE() ) + # 2.3 check FTAXIS + data_readed = reader.get('FTAXIS') + self.assertEqual(data_readed, self._get_struct_FTAXIS() ) + # 2.4 check PAGANIN + data_readed = reader.get('PAGANIN') + self.assertEqual(data_readed, self._get_struct_PAGANIN() ) + # 2.5 check BEAMGEO + data_readed = reader.get('BEAMGEO') + self.assertEqual(data_readed, self._get_struct_BEAMGEO() ) + reader.close() + + def testWritedIsReadedOldOctaveVersion(self): + """The same test as testWritedIsReaded but for octave version < 3.8 + """ + # test for octave version < 3.8 + writer = Octaveh5(3.6) + + writer.open(self.test_3_6_fname, 'a') + + # step 1 writing the file + writer.write('FT', self._get_struct_FT()) + writer.write('PYHSTEXE', self._get_struct_PYHSTEXE()) + writer.write('FTAXIS', self._get_struct_FTAXIS()) + writer.write('PAGANIN', self._get_struct_PAGANIN()) + writer.write('BEAMGEO', self._get_struct_BEAMGEO()) + writer.close() + + # step 2 reading the file + reader = Octaveh5(3.6).open(self.test_3_6_fname) + # 2.1 check FT + data_readed = reader.get('FT') + self.assertEqual(data_readed, self._get_struct_FT() ) + # 2.2 check PYHSTEXE + data_readed = reader.get('PYHSTEXE') + self.assertEqual(data_readed, self._get_struct_PYHSTEXE() ) + # 2.3 check FTAXIS + data_readed = reader.get('FTAXIS') + self.assertEqual(data_readed, self._get_struct_FTAXIS() ) + # 2.4 check PAGANIN + data_readed = reader.get('PAGANIN') + self.assertEqual(data_readed, self._get_struct_PAGANIN() ) + # 2.5 check BEAMGEO + data_readed = reader.get('BEAMGEO') + self.assertEqual(data_readed, self._get_struct_BEAMGEO() ) + reader.close() diff --git a/src/silx/io/test/test_rawh5.py b/src/silx/io/test/test_rawh5.py new file mode 100644 index 0000000..236484d --- /dev/null +++ b/src/silx/io/test/test_rawh5.py @@ -0,0 +1,85 @@ +# coding: utf-8 +# /*########################################################################## +# +# Copyright (c) 2016 European Synchrotron Radiation Facility +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. +# +# ###########################################################################*/ +"""Test for silx.gui.hdf5 module""" + +__authors__ = ["V. Valls"] +__license__ = "MIT" +__date__ = "21/09/2017" + + +import unittest +import tempfile +import numpy +import shutil +from ..import rawh5 + + +class TestNumpyFile(unittest.TestCase): + + @classmethod + def setUpClass(cls): + cls.tmpDirectory = tempfile.mkdtemp() + + @classmethod + def tearDownClass(cls): + shutil.rmtree(cls.tmpDirectory) + + def testNumpyFile(self): + filename = "%s/%s.npy" % (self.tmpDirectory, self.id()) + c = numpy.random.rand(5, 5) + numpy.save(filename, c) + h5 = rawh5.NumpyFile(filename) + self.assertIn("data", h5) + self.assertEqual(h5["data"].dtype.kind, "f") + + def testNumpyZFile(self): + filename = "%s/%s.npz" % (self.tmpDirectory, self.id()) + a = numpy.array(u"aaaaa") + b = numpy.array([1, 2, 3, 4]) + c = numpy.random.rand(5, 5) + d = numpy.array(b"aaaaa") + e = numpy.array(u"i \u2661 my mother") + numpy.savez(filename, a, b=b, c=c, d=d, e=e) + h5 = rawh5.NumpyFile(filename) + self.assertIn("arr_0", h5) + self.assertIn("b", h5) + self.assertIn("c", h5) + self.assertIn("d", h5) + self.assertIn("e", h5) + self.assertEqual(h5["arr_0"].dtype.kind, "U") + self.assertEqual(h5["b"].dtype.kind, "i") + self.assertEqual(h5["c"].dtype.kind, "f") + self.assertEqual(h5["d"].dtype.kind, "S") + self.assertEqual(h5["e"].dtype.kind, "U") + + def testNumpyZFileContainingDirectories(self): + filename = "%s/%s.npz" % (self.tmpDirectory, self.id()) + data = {} + data['a/b/c'] = numpy.arange(10) + data['a/b/e'] = numpy.arange(10) + numpy.savez(filename, **data) + h5 = rawh5.NumpyFile(filename) + self.assertIn("a/b/c", h5) + self.assertIn("a/b/e", h5) diff --git a/src/silx/io/test/test_specfile.py b/src/silx/io/test/test_specfile.py new file mode 100644 index 0000000..44cb08c --- /dev/null +++ b/src/silx/io/test/test_specfile.py @@ -0,0 +1,420 @@ +# coding: utf-8 +# /*########################################################################## +# Copyright (C) 2016-2021 European Synchrotron Radiation Facility +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. +# +# ############################################################################*/ +"""Tests for specfile wrapper""" + +__authors__ = ["P. Knobel", "V.A. Sole"] +__license__ = "MIT" +__date__ = "17/01/2018" + + +import locale +import logging +import numpy +import os +import sys +import tempfile +import unittest + +from silx.utils import testutils + +from ..specfile import SpecFile, Scan +from .. import specfile + + +logger1 = logging.getLogger(__name__) + +sftext = """#F /tmp/sf.dat +#E 1455180875 +#D Thu Feb 11 09:54:35 2016 +#C imaging User = opid17 +#U00 user comment first line +#U01 This is a dummy file to test SpecFile parsing +#U02 +#U03 last line + +#O0 Pslit HGap MRTSlit UP MRTSlit DOWN +#O1 Sslit1 VOff Sslit1 HOff Sslit1 VGap +#o0 pshg mrtu mrtd +#o2 ss1vo ss1ho ss1vg + +#J0 Seconds IA ion.mono Current +#J1 xbpmc2 idgap1 Inorm + +#S 1 ascan ss1vo -4.55687 -0.556875 40 0.2 +#D Thu Feb 11 09:55:20 2016 +#T 0.2 (Seconds) +#G0 0 +#G1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 +#G3 0 0 0 0 0 0 0 0 0 +#G4 0 +#Q +#P0 180.005 -0.66875 0.87125 +#P1 14.74255 16.197579 12.238283 +#UMI0 Current AutoM Shutter +#UMI1 192.51 OFF FE open +#UMI2 Refill in 39883 sec, Fill Mode: uniform multibunch / Message: Feb 11 08:00 Delivery:Next Refill at 21:00; +#N 4 +#L first column second column 3rd_col +-1.23 5.89 8 +8.478100E+01 5 1.56 +3.14 2.73 -3.14 +1.2 2.3 3.4 + +#S 25 ascan c3th 1.33245 1.52245 40 0.15 +#D Thu Feb 11 10:00:31 2016 +#P0 80.005 -1.66875 1.87125 +#P1 4.74255 6.197579 2.238283 +#N 5 +#L column0 column1 col2 col3 +0.0 0.1 0.2 0.3 +1.0 1.1 1.2 1.3 +2.0 2.1 2.2 2.3 +3.0 3.1 3.2 3.3 + +#S 26 yyyyyy +#D Thu Feb 11 09:55:20 2016 +#P0 80.005 -1.66875 1.87125 +#P1 4.74255 6.197579 2.238283 +#N 4 +#L first column second column 3rd_col +#C Sat Oct 31 15:51:47 1998. Scan aborted after 0 points. + +#F /tmp/sf.dat +#E 1455180876 +#D Thu Feb 11 09:54:36 2016 + +#S 1 aaaaaa +#U first duplicate line +#U second duplicate line +#@MCADEV 1 +#@MCA %16C +#@CHANN 3 0 2 1 +#@CALIB 1 2 3 +#N 3 +#L uno duo +1 2 +@A 0 1 2 +3 4 +@A 3.1 4 5 +5 6 +@A 6 7.7 8 +""" + + +loc = locale.getlocale(locale.LC_NUMERIC) +try: + locale.setlocale(locale.LC_NUMERIC, 'de_DE.utf8') +except locale.Error: + try_DE = False +else: + try_DE = True + locale.setlocale(locale.LC_NUMERIC, loc) + + +class TestSpecFile(unittest.TestCase): + @classmethod + def setUpClass(cls): + fd, cls.fname1 = tempfile.mkstemp(text=False) + if sys.version_info < (3, ): + os.write(fd, sftext) + else: + os.write(fd, bytes(sftext, 'ascii')) + os.close(fd) + + fd2, cls.fname2 = tempfile.mkstemp(text=False) + if sys.version_info < (3, ): + os.write(fd2, sftext[370:923]) + else: + os.write(fd2, bytes(sftext[370:923], 'ascii')) + os.close(fd2) + + fd3, cls.fname3 = tempfile.mkstemp(text=False) + txt = sftext[371:923] + if sys.version_info < (3, ): + os.write(fd3, txt) + else: + os.write(fd3, bytes(txt, 'ascii')) + os.close(fd3) + + @classmethod + def tearDownClass(cls): + os.unlink(cls.fname1) + os.unlink(cls.fname2) + os.unlink(cls.fname3) + + def setUp(self): + self.sf = SpecFile(self.fname1) + self.scan1 = self.sf[0] + self.scan1_2 = self.sf["1.2"] + self.scan25 = self.sf["25.1"] + self.empty_scan = self.sf["26.1"] + + self.sf_no_fhdr = SpecFile(self.fname2) + self.scan1_no_fhdr = self.sf_no_fhdr[0] + + self.sf_no_fhdr_crash = SpecFile(self.fname3) + self.scan1_no_fhdr_crash = self.sf_no_fhdr_crash[0] + + def tearDown(self): + self.sf.close() + self.sf_no_fhdr.close() + self.sf_no_fhdr_crash.close() + + def test_open(self): + self.assertIsInstance(self.sf, SpecFile) + with self.assertRaises(specfile.SfErrFileOpen): + SpecFile("doesnt_exist.dat") + + # test filename types unicode and bytes + if sys.version_info[0] < 3: + try: + SpecFile(self.fname1) + except TypeError: + self.fail("failed to handle filename as python2 str") + try: + SpecFile(unicode(self.fname1)) + except TypeError: + self.fail("failed to handle filename as python2 unicode") + else: + try: + SpecFile(self.fname1) + except TypeError: + self.fail("failed to handle filename as python3 str") + try: + SpecFile(bytes(self.fname1, 'utf-8')) + except TypeError: + self.fail("failed to handle filename as python3 bytes") + + def test_number_of_scans(self): + self.assertEqual(4, len(self.sf)) + + def test_list_of_scan_indices(self): + self.assertEqual(self.sf.list(), + [1, 25, 26, 1]) + self.assertEqual(self.sf.keys(), + ["1.1", "25.1", "26.1", "1.2"]) + + def test_index_number_order(self): + self.assertEqual(self.sf.index(1, 2), 3) # sf["1.2"]==sf[3] + self.assertEqual(self.sf.number(1), 25) # sf[1]==sf["25"] + self.assertEqual(self.sf.order(3), 2) # sf[3]==sf["1.2"] + with self.assertRaises(specfile.SfErrScanNotFound): + self.sf.index(3, 2) + with self.assertRaises(specfile.SfErrScanNotFound): + self.sf.index(99) + + def assertRaisesRegex(self, *args, **kwargs): + # Python 2 compatibility + if sys.version_info.major >= 3: + return super(TestSpecFile, self).assertRaisesRegex(*args, **kwargs) + else: + return self.assertRaisesRegexp(*args, **kwargs) + + def test_getitem(self): + self.assertIsInstance(self.sf[2], Scan) + self.assertIsInstance(self.sf["1.2"], Scan) + # int out of range + with self.assertRaisesRegex(IndexError, 'Scan index must be in ran'): + self.sf[107] + # float indexing not allowed + with self.assertRaisesRegex(TypeError, 'The scan identification k'): + self.sf[1.2] + # non existant scan with "N.M" indexing + with self.assertRaises(KeyError): + self.sf["3.2"] + + def test_specfile_iterator(self): + i = 0 + for scan in self.sf: + if i == 1: + self.assertEqual(scan.motor_positions, + self.sf[1].motor_positions) + i += 1 + # number of returned scans + self.assertEqual(i, len(self.sf)) + + def test_scan_index(self): + self.assertEqual(self.scan1.index, 0) + self.assertEqual(self.scan1_2.index, 3) + self.assertEqual(self.scan25.index, 1) + + def test_scan_headers(self): + self.assertEqual(self.scan25.scan_header_dict['S'], + "25 ascan c3th 1.33245 1.52245 40 0.15") + self.assertEqual(self.scan1.header[17], '#G0 0') + self.assertEqual(len(self.scan1.header), 29) + # parsing headers with long keys + self.assertEqual(self.scan1.scan_header_dict['UMI0'], + 'Current AutoM Shutter') + # parsing empty headers + self.assertEqual(self.scan1.scan_header_dict['Q'], '') + # duplicate headers: concatenated (with newline) + self.assertEqual(self.scan1_2.scan_header_dict["U"], + "first duplicate line\nsecond duplicate line") + + def test_file_headers(self): + self.assertEqual(self.scan1.header[1], + '#E 1455180875') + self.assertEqual(self.scan1.file_header_dict['F'], + '/tmp/sf.dat') + + def test_multiple_file_headers(self): + """Scan 1.2 is after the second file header, with a different + Epoch""" + self.assertEqual(self.scan1_2.header[1], + '#E 1455180876') + + def test_scan_labels(self): + self.assertEqual(self.scan1.labels, + ['first column', 'second column', '3rd_col']) + + def test_data(self): + # data_line() and data_col() take 1-based indices as arg + self.assertAlmostEqual(self.scan1.data_line(1)[2], + 1.56) + # tests for data transposition between original file and .data attr + self.assertAlmostEqual(self.scan1.data[2, 0], + 8) + self.assertEqual(self.scan1.data.shape, (3, 4)) + self.assertAlmostEqual(numpy.sum(self.scan1.data), 113.631) + + def test_data_column_by_name(self): + self.assertAlmostEqual(self.scan25.data_column_by_name("col2")[1], + 1.2) + # Scan.data is transposed after readinq, so column is the first index + self.assertAlmostEqual(numpy.sum(self.scan25.data_column_by_name("col2")), + numpy.sum(self.scan25.data[2, :])) + with self.assertRaises(specfile.SfErrColNotFound): + self.scan25.data_column_by_name("ygfxgfyxg") + + def test_motors(self): + self.assertEqual(len(self.scan1.motor_names), 6) + self.assertEqual(len(self.scan1.motor_positions), 6) + self.assertAlmostEqual(sum(self.scan1.motor_positions), + 223.385912) + self.assertEqual(self.scan1.motor_names[1], 'MRTSlit UP') + self.assertAlmostEqual( + self.scan25.motor_position_by_name('MRTSlit UP'), + -1.66875) + + def test_absence_of_file_header(self): + """We expect Scan.file_header to be an empty list in the absence + of a file header. + """ + self.assertEqual(len(self.scan1_no_fhdr.motor_names), 0) + # motor positions can still be read in the scan header + # even in the absence of motor names + self.assertAlmostEqual(sum(self.scan1_no_fhdr.motor_positions), + 223.385912) + self.assertEqual(len(self.scan1_no_fhdr.header), 15) + self.assertEqual(len(self.scan1_no_fhdr.scan_header), 15) + self.assertEqual(len(self.scan1_no_fhdr.file_header), 0) + + def test_crash_absence_of_file_header(self): + """Test no crash in absence of file header and no leading newline + character + """ + self.assertEqual(len(self.scan1_no_fhdr_crash.motor_names), 0) + # motor positions can still be read in the scan header + # even in the absence of motor names + self.assertAlmostEqual(sum(self.scan1_no_fhdr_crash.motor_positions), + 223.385912) + self.assertEqual(len(self.scan1_no_fhdr_crash.scan_header), 15) + self.assertEqual(len(self.scan1_no_fhdr_crash.file_header), 0) + + def test_mca(self): + self.assertEqual(len(self.scan1.mca), 0) + self.assertEqual(len(self.scan1_2.mca), 3) + self.assertEqual(self.scan1_2.mca[1][2], 5) + self.assertEqual(sum(self.scan1_2.mca[2]), 21.7) + + # Negative indexing + self.assertEqual(sum(self.scan1_2.mca[len(self.scan1_2.mca) - 1]), + sum(self.scan1_2.mca[-1])) + + # Test iterator + line_count, total_sum = (0, 0) + for mca_line in self.scan1_2.mca: + line_count += 1 + total_sum += sum(mca_line) + self.assertEqual(line_count, 3) + self.assertAlmostEqual(total_sum, 36.8) + + def test_mca_header(self): + self.assertEqual(self.scan1.mca_header_dict, {}) + self.assertEqual(len(self.scan1_2.mca_header_dict), 4) + self.assertEqual(self.scan1_2.mca_header_dict["CALIB"], "1 2 3") + self.assertEqual(self.scan1_2.mca.calibration, + [[1., 2., 3.]]) + # default calib in the absence of #@CALIB + self.assertEqual(self.scan25.mca.calibration, + [[0., 1., 0.]]) + self.assertEqual(self.scan1_2.mca.channels, + [[0, 1, 2]]) + # absence of #@CHANN and spectra + self.assertEqual(self.scan25.mca.channels, + []) + + @testutils.validate_logging(specfile._logger.name, warning=1) + def test_empty_scan(self): + """Test reading a scan with no data points""" + self.assertEqual(len(self.empty_scan.labels), + 3) + col1 = self.empty_scan.data_column_by_name("second column") + self.assertEqual(col1.shape, (0, )) + + +class TestSFLocale(unittest.TestCase): + @classmethod + def setUpClass(cls): + fd, cls.fname = tempfile.mkstemp(text=False) + if sys.version_info < (3, ): + os.write(fd, sftext) + else: + os.write(fd, bytes(sftext, 'ascii')) + os.close(fd) + + @classmethod + def tearDownClass(cls): + os.unlink(cls.fname) + locale.setlocale(locale.LC_NUMERIC, loc) # restore saved locale + + def crunch_data(self): + self.sf3 = SpecFile(self.fname) + self.assertAlmostEqual(self.sf3[0].data_line(1)[2], + 1.56) + self.sf3.close() + + @unittest.skipIf(not try_DE, "de_DE.utf8 locale not installed") + def test_locale_de_DE(self): + locale.setlocale(locale.LC_NUMERIC, 'de_DE.utf8') + self.crunch_data() + + def test_locale_user(self): + locale.setlocale(locale.LC_NUMERIC, '') # use user's preferred locale + self.crunch_data() + + def test_locale_C(self): + locale.setlocale(locale.LC_NUMERIC, 'C') # use default (C) locale + self.crunch_data() diff --git a/src/silx/io/test/test_specfilewrapper.py b/src/silx/io/test/test_specfilewrapper.py new file mode 100644 index 0000000..a1ba5f4 --- /dev/null +++ b/src/silx/io/test/test_specfilewrapper.py @@ -0,0 +1,195 @@ +# coding: utf-8 +# /*########################################################################## +# Copyright (C) 2016 European Synchrotron Radiation Facility +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. +# +# ############################################################################*/ +"""Tests for old specfile wrapper""" + +__authors__ = ["P. Knobel"] +__license__ = "MIT" +__date__ = "15/05/2017" + +import locale +import logging +import numpy +import os +import sys +import tempfile +import unittest + +logger1 = logging.getLogger(__name__) + +from ..specfilewrapper import Specfile + +sftext = """#F /tmp/sf.dat +#E 1455180875 +#D Thu Feb 11 09:54:35 2016 +#C imaging User = opid17 +#U00 user comment first line +#U01 This is a dummy file to test SpecFile parsing +#U02 +#U03 last line + +#O0 Pslit HGap MRTSlit UP MRTSlit DOWN +#O1 Sslit1 VOff Sslit1 HOff Sslit1 VGap +#o0 pshg mrtu mrtd +#o2 ss1vo ss1ho ss1vg + +#J0 Seconds IA ion.mono Current +#J1 xbpmc2 idgap1 Inorm + +#S 1 ascan ss1vo -4.55687 -0.556875 40 0.2 +#D Thu Feb 11 09:55:20 2016 +#T 0.2 (Seconds) +#G0 0 +#G1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 +#G3 0 0 0 0 0 0 0 0 0 +#G4 0 +#Q +#P0 180.005 -0.66875 0.87125 +#P1 14.74255 16.197579 12.238283 +#UMI0 Current AutoM Shutter +#UMI1 192.51 OFF FE open +#UMI2 Refill in 39883 sec, Fill Mode: uniform multibunch / Message: Feb 11 08:00 Delivery:Next Refill at 21:00; +#N 4 +#L first column second column 3rd_col +-1.23 5.89 8 +8.478100E+01 5 1.56 +3.14 2.73 -3.14 +1.2 2.3 3.4 + +#S 25 ascan c3th 1.33245 1.52245 40 0.15 +#D Thu Feb 11 10:00:31 2016 +#P0 80.005 -1.66875 1.87125 +#P1 4.74255 6.197579 2.238283 +#N 5 +#L column0 column1 col2 col3 +0.0 0.1 0.2 0.3 +1.0 1.1 1.2 1.3 +2.0 2.1 2.2 2.3 +3.0 3.1 3.2 3.3 + +#F /tmp/sf.dat +#E 1455180876 +#D Thu Feb 11 09:54:36 2016 + +#S 1 aaaaaa +#U first duplicate line +#U second duplicate line +#@MCADEV 1 +#@MCA %16C +#@CHANN 3 0 2 1 +#@CALIB 1 2 3 +#N 3 +#L uno duo +1 2 +@A 0 1 2 +3 4 +@A 3.1 4 5 +5 6 +@A 6 7.7 8 +""" + + +class TestSpecfilewrapper(unittest.TestCase): + @classmethod + def setUpClass(cls): + fd, cls.fname1 = tempfile.mkstemp(text=False) + if sys.version_info < (3, ): + os.write(fd, sftext) + else: + os.write(fd, bytes(sftext, 'ascii')) + os.close(fd) + + @classmethod + def tearDownClass(cls): + os.unlink(cls.fname1) + + def setUp(self): + self.sf = Specfile(self.fname1) + self.scan1 = self.sf[0] + self.scan1_2 = self.sf.select("1.2") + self.scan25 = self.sf.select("25.1") + + def tearDown(self): + self.sf.close() + + def test_number_of_scans(self): + self.assertEqual(3, len(self.sf)) + + def test_list_of_scan_indices(self): + self.assertEqual(self.sf.list(), + '1,25,1') + self.assertEqual(self.sf.keys(), + ["1.1", "25.1", "1.2"]) + + def test_scan_headers(self): + self.assertEqual(self.scan25.header('S'), + ["#S 25 ascan c3th 1.33245 1.52245 40 0.15"]) + self.assertEqual(self.scan1.header("G0"), ['#G0 0']) + # parsing headers with long keys + # parsing empty headers + self.assertEqual(self.scan1.header('Q'), ['#Q ']) + + def test_file_headers(self): + self.assertEqual(self.scan1.header("E"), + ['#E 1455180875']) + self.assertEqual(self.sf.title(), + "imaging") + self.assertEqual(self.sf.epoch(), + 1455180875) + self.assertEqual(self.sf.allmotors(), + ["Pslit HGap", "MRTSlit UP", "MRTSlit DOWN", + "Sslit1 VOff", "Sslit1 HOff", "Sslit1 VGap"]) + + def test_scan_labels(self): + self.assertEqual(self.scan1.alllabels(), + ['first column', 'second column', '3rd_col']) + + def test_data(self): + self.assertAlmostEqual(self.scan1.dataline(3)[2], + -3.14) + self.assertAlmostEqual(self.scan1.datacol(1)[2], + 3.14) + # tests for data transposition between original file and .data attr + self.assertAlmostEqual(self.scan1.data()[2, 0], + 8) + self.assertEqual(self.scan1.data().shape, (3, 4)) + self.assertAlmostEqual(numpy.sum(self.scan1.data()), 113.631) + + def test_date(self): + self.assertEqual(self.scan1.date(), + "Thu Feb 11 09:55:20 2016") + + def test_motors(self): + self.assertEqual(len(self.sf.allmotors()), 6) + self.assertEqual(len(self.scan1.allmotorpos()), 6) + self.assertAlmostEqual(sum(self.scan1.allmotorpos()), + 223.385912) + self.assertEqual(self.sf.allmotors()[1], 'MRTSlit UP') + + def test_mca(self): + self.assertEqual(self.scan1_2.mca(2)[2], 5) + self.assertEqual(sum(self.scan1_2.mca(3)), 21.7) + + def test_mca_header(self): + self.assertEqual(self.scan1_2.header("CALIB"), + ["#@CALIB 1 2 3"]) diff --git a/src/silx/io/test/test_spech5.py b/src/silx/io/test/test_spech5.py new file mode 100644 index 0000000..1e67961 --- /dev/null +++ b/src/silx/io/test/test_spech5.py @@ -0,0 +1,929 @@ +# coding: utf-8 +# /*########################################################################## +# Copyright (C) 2016-2021 European Synchrotron Radiation Facility +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. +# +# ############################################################################*/ +"""Tests for spech5""" +import numpy +import os +import io +import sys +import tempfile +import unittest +import datetime +from functools import partial + +from silx.utils import testutils + +from .. import spech5 +from ..spech5 import (SpecH5, SpecH5Dataset, spec_date_to_iso8601) +from .. import specfile + +import h5py + +__authors__ = ["P. Knobel"] +__license__ = "MIT" +__date__ = "12/02/2018" + +sftext = """#F /tmp/sf.dat +#E 1455180875 +#D Thu Feb 11 09:54:35 2016 +#C imaging User = opid17 +#O0 Pslit HGap MRTSlit UP MRTSlit DOWN +#O1 Sslit1 VOff Sslit1 HOff Sslit1 VGap +#o0 pshg mrtu mrtd +#o2 ss1vo ss1ho ss1vg + +#J0 Seconds IA ion.mono Current +#J1 xbpmc2 idgap1 Inorm + +#S 1 ascan ss1vo -4.55687 -0.556875 40 0.2 +#D Thu Feb 11 09:55:20 2016 +#T 0.2 (Seconds) +#P0 180.005 -0.66875 0.87125 +#P1 14.74255 16.197579 12.238283 +#N 4 +#L MRTSlit UP second column 3rd_col +-1.23 5.89 8 +8.478100E+01 5 1.56 +3.14 2.73 -3.14 +1.2 2.3 3.4 + +#S 25 ascan c3th 1.33245 1.52245 40 0.15 +#D Sat 2015/03/14 03:53:50 +#P0 80.005 -1.66875 1.87125 +#P1 4.74255 6.197579 2.238283 +#N 5 +#L column0 column1 col2 col3 +0.0 0.1 0.2 0.3 +1.0 1.1 1.2 1.3 +2.0 2.1 2.2 2.3 +3.0 3.1 3.2 3.3 + +#S 1 aaaaaa +#D Thu Feb 11 10:00:32 2016 +#@MCADEV 1 +#@MCA %16C +#@CHANN 3 0 2 1 +#@CALIB 1 2 3 +#@CTIME 123.4 234.5 345.6 +#N 3 +#L uno duo +1 2 +@A 0 1 2 +@A 10 9 8 +@A 1 1 1.1 +3 4 +@A 3.1 4 5 +@A 7 6 5 +@A 1 1 1 +5 6 +@A 6 7.7 8 +@A 4 3 2 +@A 1 1 1 + +#S 1000 bbbbb +#G1 3.25 3.25 5.207 90 90 120 2.232368448 2.232368448 1.206680489 90 90 60 1 1 2 -1 2 2 26.132 7.41 -88.96 1.11 1.000012861 15.19 26.06 67.355 -88.96 1.11 1.000012861 15.11 0.723353 0.723353 +#G3 0.0106337923671 0.027529133 1.206191273 -1.43467075 0.7633438883 0.02401568018 -1.709143587 -2.097621783 0.02456954971 +#L a b +1 2 + +#S 1001 ccccc +#G1 0. 0. 0. 0 0 0 2.232368448 2.232368448 1.206680489 90 90 60 1 1 2 -1 2 2 26.132 7.41 -88.96 1.11 1.000012861 15.19 26.06 67.355 -88.96 1.11 1.000012861 15.11 0.723353 0.723353 +#G3 0. 0. 0. 0. 0.0 0. 0. 0. 0. +#L a b +1 2 + +""" + + +class TestSpecDate(unittest.TestCase): + """ + Test of the spec_date_to_iso8601 function. + """ + # TODO : time zone tests + # TODO : error cases + + @classmethod + def setUpClass(cls): + import locale + # FYI : not threadsafe + cls.locale_saved = locale.setlocale(locale.LC_TIME) + locale.setlocale(locale.LC_TIME, 'C') + + @classmethod + def tearDownClass(cls): + import locale + # FYI : not threadsafe + locale.setlocale(locale.LC_TIME, cls.locale_saved) + + def setUp(self): + # covering all week days + self.n_days = range(1, 10) + # covering all months + self.n_months = range(1, 13) + + self.n_years = [1999, 2016, 2020] + self.n_seconds = [0, 5, 26, 59] + self.n_minutes = [0, 9, 42, 59] + self.n_hours = [0, 2, 17, 23] + + self.formats = ['%a %b %d %H:%M:%S %Y', '%a %Y/%m/%d %H:%M:%S'] + + self.check_date_formats = partial(self.__check_date_formats, + year=self.n_years[0], + month=self.n_months[0], + day=self.n_days[0], + hour=self.n_hours[0], + minute=self.n_minutes[0], + second=self.n_seconds[0], + msg=None) + + def __check_date_formats(self, + year, + month, + day, + hour, + minute, + second, + msg=None): + dt = datetime.datetime(year, month, day, hour, minute, second) + expected_date = dt.isoformat() + + for i_fmt, fmt in enumerate(self.formats): + spec_date = dt.strftime(fmt) + iso_date = spec_date_to_iso8601(spec_date) + self.assertEqual(iso_date, + expected_date, + msg='Testing {0}. format={1}. ' + 'Expected "{2}", got "{3} ({4})" (dt={5}).' + ''.format(msg, + i_fmt, + expected_date, + iso_date, + spec_date, + dt)) + + def testYearsNominal(self): + for year in self.n_years: + self.check_date_formats(year=year, msg='year') + + def testMonthsNominal(self): + for month in self.n_months: + self.check_date_formats(month=month, msg='month') + + def testDaysNominal(self): + for day in self.n_days: + self.check_date_formats(day=day, msg='day') + + def testHoursNominal(self): + for hour in self.n_hours: + self.check_date_formats(hour=hour, msg='hour') + + def testMinutesNominal(self): + for minute in self.n_minutes: + self.check_date_formats(minute=minute, msg='minute') + + def testSecondsNominal(self): + for second in self.n_seconds: + self.check_date_formats(second=second, msg='second') + + +class TestSpecH5(unittest.TestCase): + @classmethod + def setUpClass(cls): + fd, cls.fname = tempfile.mkstemp() + if sys.version_info < (3, ): + os.write(fd, sftext) + else: + os.write(fd, bytes(sftext, 'ascii')) + os.close(fd) + + @classmethod + def tearDownClass(cls): + os.unlink(cls.fname) + + def setUp(self): + self.sfh5 = SpecH5(self.fname) + + def tearDown(self): + self.sfh5.close() + + def testContainsFile(self): + self.assertIn("/1.2/measurement", self.sfh5) + self.assertIn("/25.1", self.sfh5) + self.assertIn("25.1", self.sfh5) + self.assertNotIn("25.2", self.sfh5) + # measurement is a child of a scan, full path would be required to + # access from root level + self.assertNotIn("measurement", self.sfh5) + # Groups may or may not have a trailing / + self.assertIn("/1.2/measurement/mca_1/", self.sfh5) + self.assertIn("/1.2/measurement/mca_1", self.sfh5) + # Datasets can't have a trailing / + self.assertNotIn("/1.2/measurement/mca_0/info/calibration/ ", self.sfh5) + # No mca_8 + self.assertNotIn("/1.2/measurement/mca_8/info/calibration", self.sfh5) + # Link + self.assertIn("/1.2/measurement/mca_0/info/calibration", self.sfh5) + + def testContainsGroup(self): + self.assertIn("measurement", self.sfh5["/1.2/"]) + self.assertIn("measurement", self.sfh5["/1.2"]) + self.assertIn("25.1", self.sfh5["/"]) + self.assertNotIn("25.2", self.sfh5["/"]) + self.assertIn("instrument/positioners/Sslit1 HOff", self.sfh5["/1.1"]) + # illegal trailing "/" after dataset name + self.assertNotIn("instrument/positioners/Sslit1 HOff/", + self.sfh5["/1.1"]) + # full path to element in group (OK) + self.assertIn("/1.1/instrument/positioners/Sslit1 HOff", + self.sfh5["/1.1/instrument"]) + + def testDataColumn(self): + self.assertAlmostEqual(sum(self.sfh5["/1.2/measurement/duo"]), + 12.0) + self.assertAlmostEqual( + sum(self.sfh5["1.1"]["measurement"]["MRTSlit UP"]), + 87.891, places=4) + + def testDate(self): + # start time is in Iso8601 format + self.assertEqual(self.sfh5["/1.1/start_time"], + u"2016-02-11T09:55:20") + self.assertEqual(self.sfh5["25.1/start_time"], + u"2015-03-14T03:53:50") + + def assertRaisesRegex(self, *args, **kwargs): + # Python 2 compatibility + if sys.version_info.major >= 3: + return super(TestSpecH5, self).assertRaisesRegex(*args, **kwargs) + else: + return self.assertRaisesRegexp(*args, **kwargs) + + def testDatasetInstanceAttr(self): + """The SpecH5Dataset objects must implement some dummy attributes + to improve compatibility with widgets dealing with h5py datasets.""" + self.assertIsNone(self.sfh5["/1.1/start_time"].compression) + self.assertIsNone(self.sfh5["1.1"]["measurement"]["MRTSlit UP"].chunks) + + # error message must be explicit + with self.assertRaisesRegex( + AttributeError, + "SpecH5Dataset has no attribute tOTo"): + dummy = self.sfh5["/1.1/start_time"].tOTo + + def testGet(self): + """Test :meth:`SpecH5Group.get`""" + # default value of param *default* is None + self.assertIsNone(self.sfh5.get("toto")) + self.assertEqual(self.sfh5["25.1"].get("toto", default=-3), + -3) + + self.assertEqual(self.sfh5.get("/1.1/start_time", default=-3), + u"2016-02-11T09:55:20") + + def testGetClass(self): + """Test :meth:`SpecH5Group.get`""" + self.assertIs(self.sfh5["1.1"].get("start_time", getclass=True), + h5py.Dataset) + self.assertIs(self.sfh5["1.1"].get("instrument", getclass=True), + h5py.Group) + + # spech5 does not define external link, so there is no way + # a group can *get* a SpecH5 class + + def testGetApi(self): + result = self.sfh5.get("1.1", getclass=True, getlink=True) + self.assertIs(result, h5py.HardLink) + result = self.sfh5.get("1.1", getclass=False, getlink=True) + self.assertIsInstance(result, h5py.HardLink) + result = self.sfh5.get("1.1", getclass=True, getlink=False) + self.assertIs(result, h5py.Group) + result = self.sfh5.get("1.1", getclass=False, getlink=False) + self.assertIsInstance(result, spech5.SpecH5Group) + + def testGetItemGroup(self): + group = self.sfh5["25.1"]["instrument"] + self.assertEqual(list(group["positioners"].keys()), + ["Pslit HGap", "MRTSlit UP", "MRTSlit DOWN", + "Sslit1 VOff", "Sslit1 HOff", "Sslit1 VGap"]) + with self.assertRaises(KeyError): + group["Holy Grail"] + + def testGetitemSpecH5(self): + self.assertEqual(self.sfh5["/1.2/instrument/positioners"], + self.sfh5["1.2"]["instrument"]["positioners"]) + + def testH5pyClass(self): + """Test :attr:`h5py_class` returns the corresponding h5py class + (h5py.File, h5py.Group, h5py.Dataset)""" + a_file = self.sfh5 + self.assertIs(a_file.h5py_class, + h5py.File) + + a_group = self.sfh5["/1.2/measurement"] + self.assertIs(a_group.h5py_class, + h5py.Group) + + a_dataset = self.sfh5["/1.1/instrument/positioners/Sslit1 HOff"] + self.assertIs(a_dataset.h5py_class, + h5py.Dataset) + + def testHeader(self): + file_header = self.sfh5["/1.2/instrument/specfile/file_header"] + scan_header = self.sfh5["/1.2/instrument/specfile/scan_header"] + + # File header has 10 lines + self.assertEqual(len(file_header), 10) + # 1.2 has 9 scan & mca header lines + self.assertEqual(len(scan_header), 9) + + # line 4 of file header + self.assertEqual( + file_header[3], + u"#C imaging User = opid17") + # line 4 of scan header + scan_header = self.sfh5["25.1/instrument/specfile/scan_header"] + + self.assertEqual( + scan_header[3], + u"#P1 4.74255 6.197579 2.238283") + + def testLinks(self): + self.assertTrue(numpy.array_equal( + self.sfh5["/1.2/measurement/mca_0/data"], + self.sfh5["/1.2/instrument/mca_0/data"]) + ) + self.assertTrue(numpy.array_equal( + self.sfh5["/1.2/measurement/mca_0/info/data"], + self.sfh5["/1.2/instrument/mca_0/data"]) + ) + self.assertTrue(numpy.array_equal( + self.sfh5["/1.2/measurement/mca_0/info/channels"], + self.sfh5["/1.2/instrument/mca_0/channels"]) + ) + self.assertEqual(self.sfh5["/1.2/measurement/mca_0/info/"].keys(), + self.sfh5["/1.2/instrument/mca_0/"].keys()) + + self.assertEqual(self.sfh5["/1.2/measurement/mca_0/info/preset_time"], + self.sfh5["/1.2/instrument/mca_0/preset_time"]) + self.assertEqual(self.sfh5["/1.2/measurement/mca_0/info/live_time"], + self.sfh5["/1.2/instrument/mca_0/live_time"]) + self.assertEqual(self.sfh5["/1.2/measurement/mca_0/info/elapsed_time"], + self.sfh5["/1.2/instrument/mca_0/elapsed_time"]) + + def testListScanIndices(self): + self.assertEqual(list(self.sfh5.keys()), + ["1.1", "25.1", "1.2", "1000.1", "1001.1"]) + self.assertEqual(self.sfh5["1.2"].attrs, + {"NX_class": "NXentry", }) + + def testMcaAbsent(self): + def access_absent_mca(): + """This must raise a KeyError, because scan 1.1 has no MCA""" + return self.sfh5["/1.1/measurement/mca_0/"] + self.assertRaises(KeyError, access_absent_mca) + + def testMcaCalib(self): + mca0_calib = self.sfh5["/1.2/measurement/mca_0/info/calibration"] + mca1_calib = self.sfh5["/1.2/measurement/mca_1/info/calibration"] + self.assertEqual(mca0_calib.tolist(), + [1, 2, 3]) + # calibration is unique in this scan and applies to all analysers + self.assertEqual(mca0_calib.tolist(), + mca1_calib.tolist()) + + def testMcaChannels(self): + mca0_chann = self.sfh5["/1.2/measurement/mca_0/info/channels"] + mca1_chann = self.sfh5["/1.2/measurement/mca_1/info/channels"] + self.assertEqual(mca0_chann.tolist(), + [0, 1, 2]) + self.assertEqual(mca0_chann.tolist(), + mca1_chann.tolist()) + + def testMcaCtime(self): + """Tests for #@CTIME mca header""" + datasets = ["preset_time", "live_time", "elapsed_time"] + for ds in datasets: + self.assertNotIn("/1.1/instrument/mca_0/" + ds, self.sfh5) + self.assertIn("/1.2/instrument/mca_0/" + ds, self.sfh5) + + mca0_preset_time = self.sfh5["/1.2/instrument/mca_0/preset_time"] + mca1_preset_time = self.sfh5["/1.2/instrument/mca_1/preset_time"] + self.assertLess(mca0_preset_time - 123.4, + 10**-5) + # ctime is unique in a this scan and applies to all analysers + self.assertEqual(mca0_preset_time, + mca1_preset_time) + + mca0_live_time = self.sfh5["/1.2/instrument/mca_0/live_time"] + mca1_live_time = self.sfh5["/1.2/instrument/mca_1/live_time"] + self.assertLess(mca0_live_time - 234.5, + 10**-5) + self.assertEqual(mca0_live_time, + mca1_live_time) + + mca0_elapsed_time = self.sfh5["/1.2/instrument/mca_0/elapsed_time"] + mca1_elapsed_time = self.sfh5["/1.2/instrument/mca_1/elapsed_time"] + self.assertLess(mca0_elapsed_time - 345.6, + 10**-5) + self.assertEqual(mca0_elapsed_time, + mca1_elapsed_time) + + def testMcaData(self): + # sum 1st MCA in scan 1.2 over rows + mca_0_data = self.sfh5["/1.2/measurement/mca_0/data"] + for summed_row, expected in zip(mca_0_data.sum(axis=1).tolist(), + [3.0, 12.1, 21.7]): + self.assertAlmostEqual(summed_row, expected, places=4) + + # sum 3rd MCA in scan 1.2 along both axis + mca_2_data = self.sfh5["1.2"]["measurement"]["mca_2"]["data"] + self.assertAlmostEqual(sum(sum(mca_2_data)), 9.1, places=5) + # attrs + self.assertEqual(mca_0_data.attrs, {"interpretation": "spectrum"}) + + def testMotorPosition(self): + positioners_group = self.sfh5["/1.1/instrument/positioners"] + # MRTSlit DOWN position is defined in #P0 san header line + self.assertAlmostEqual(float(positioners_group["MRTSlit DOWN"]), + 0.87125) + # MRTSlit UP position is defined in first data column + for a, b in zip(positioners_group["MRTSlit UP"].tolist(), + [-1.23, 8.478100E+01, 3.14, 1.2]): + self.assertAlmostEqual(float(a), b, places=4) + + def testNumberMcaAnalysers(self): + """Scan 1.2 has 2 data columns + 3 mca spectra per data line.""" + self.assertEqual(len(self.sfh5["1.2"]["measurement"]), 5) + + def testTitle(self): + self.assertEqual(self.sfh5["/25.1/title"], + u"ascan c3th 1.33245 1.52245 40 0.15") + + def testValues(self): + group = self.sfh5["/25.1"] + self.assertTrue(hasattr(group, "values")) + self.assertTrue(callable(group.values)) + self.assertIn(self.sfh5["/25.1/title"], + self.sfh5["/25.1"].values()) + + # visit and visititems ignore links + def testVisit(self): + name_list = [] + self.sfh5.visit(name_list.append) + self.assertIn('1.2/instrument/positioners/Pslit HGap', name_list) + self.assertIn("1.2/instrument/specfile/scan_header", name_list) + self.assertEqual(len(name_list), 117) + + # test also visit of a subgroup, with various group name formats + name_list_leading_and_trailing_slash = [] + self.sfh5['/1.2/instrument/'].visit(name_list_leading_and_trailing_slash.append) + name_list_leading_slash = [] + self.sfh5['/1.2/instrument'].visit(name_list_leading_slash.append) + name_list_trailing_slash = [] + self.sfh5['1.2/instrument/'].visit(name_list_trailing_slash.append) + name_list_no_slash = [] + self.sfh5['1.2/instrument'].visit(name_list_no_slash.append) + + # no differences expected in the output names + self.assertEqual(name_list_leading_and_trailing_slash, + name_list_leading_slash) + self.assertEqual(name_list_leading_slash, + name_list_trailing_slash) + self.assertEqual(name_list_leading_slash, + name_list_no_slash) + self.assertIn("positioners/Pslit HGap", name_list_no_slash) + self.assertIn("positioners", name_list_no_slash) + + def testVisitItems(self): + dataset_name_list = [] + + def func_generator(l): + """return a function appending names to list l""" + def func(name, obj): + if isinstance(obj, SpecH5Dataset): + l.append(name) + return func + + self.sfh5.visititems(func_generator(dataset_name_list)) + self.assertIn('1.2/instrument/positioners/Pslit HGap', dataset_name_list) + self.assertEqual(len(dataset_name_list), 85) + + # test also visit of a subgroup, with various group name formats + name_list_leading_and_trailing_slash = [] + self.sfh5['/1.2/instrument/'].visititems(func_generator(name_list_leading_and_trailing_slash)) + name_list_leading_slash = [] + self.sfh5['/1.2/instrument'].visititems(func_generator(name_list_leading_slash)) + name_list_trailing_slash = [] + self.sfh5['1.2/instrument/'].visititems(func_generator(name_list_trailing_slash)) + name_list_no_slash = [] + self.sfh5['1.2/instrument'].visititems(func_generator(name_list_no_slash)) + + # no differences expected in the output names + self.assertEqual(name_list_leading_and_trailing_slash, + name_list_leading_slash) + self.assertEqual(name_list_leading_slash, + name_list_trailing_slash) + self.assertEqual(name_list_leading_slash, + name_list_no_slash) + self.assertIn("positioners/Pslit HGap", name_list_no_slash) + + def testNotSpecH5(self): + fd, fname = tempfile.mkstemp() + os.write(fd, b"Not a spec file!") + os.close(fd) + self.assertRaises(specfile.SfErrFileOpen, SpecH5, fname) + self.assertRaises(IOError, SpecH5, fname) + os.unlink(fname) + + def testSample(self): + self.assertNotIn("sample", self.sfh5["/1.1"]) + self.assertIn("sample", self.sfh5["/1000.1"]) + self.assertIn("ub_matrix", self.sfh5["/1000.1/sample"]) + self.assertIn("unit_cell", self.sfh5["/1000.1/sample"]) + self.assertIn("unit_cell_abc", self.sfh5["/1000.1/sample"]) + self.assertIn("unit_cell_alphabetagamma", self.sfh5["/1000.1/sample"]) + + # All 0 values + self.assertNotIn("sample", self.sfh5["/1001.1"]) + with self.assertRaises(KeyError): + self.sfh5["/1001.1/sample/unit_cell"] + + @testutils.validate_logging(spech5.logger1.name, warning=2) + def testOpenFileDescriptor(self): + """Open a SpecH5 file from a file descriptor""" + with io.open(self.sfh5.filename) as f: + sfh5 = SpecH5(f) + self.assertIsNotNone(sfh5) + name_list = [] + # check if the object is working + self.sfh5.visit(name_list.append) + sfh5.close() + + +sftext_multi_mca_headers = """ +#S 1 aaaaaa +#@MCA %16C +#@CHANN 3 0 2 1 +#@CALIB 1 2 3 +#@CTIME 123.4 234.5 345.6 +#@MCA %16C +#@CHANN 3 1 3 1 +#@CALIB 5.5 6.6 7.7 +#@CTIME 10 11 12 +#N 3 +#L uno duo +1 2 +@A 0 1 2 +@A 10 9 8 +3 4 +@A 3.1 4 5 +@A 7 6 5 +5 6 +@A 6 7.7 8 +@A 4 3 2 + +""" + + +class TestSpecH5MultiMca(unittest.TestCase): + @classmethod + def setUpClass(cls): + fd, cls.fname = tempfile.mkstemp(text=False) + if sys.version_info < (3, ): + os.write(fd, sftext_multi_mca_headers) + else: + os.write(fd, bytes(sftext_multi_mca_headers, 'ascii')) + os.close(fd) + + @classmethod + def tearDownClass(cls): + os.unlink(cls.fname) + + def setUp(self): + self.sfh5 = SpecH5(self.fname) + + def tearDown(self): + self.sfh5.close() + + def testMcaCalib(self): + mca0_calib = self.sfh5["/1.1/measurement/mca_0/info/calibration"] + mca1_calib = self.sfh5["/1.1/measurement/mca_1/info/calibration"] + self.assertEqual(mca0_calib.tolist(), + [1, 2, 3]) + self.assertAlmostEqual(sum(mca1_calib.tolist()), + sum([5.5, 6.6, 7.7]), + places=5) + + def testMcaChannels(self): + mca0_chann = self.sfh5["/1.1/measurement/mca_0/info/channels"] + mca1_chann = self.sfh5["/1.1/measurement/mca_1/info/channels"] + self.assertEqual(mca0_chann.tolist(), + [0., 1., 2.]) + # @CHANN is unique in this scan and applies to all analysers + self.assertEqual(mca1_chann.tolist(), + [1., 2., 3.]) + + def testMcaCtime(self): + """Tests for #@CTIME mca header""" + mca0_preset_time = self.sfh5["/1.1/instrument/mca_0/preset_time"] + mca1_preset_time = self.sfh5["/1.1/instrument/mca_1/preset_time"] + self.assertLess(mca0_preset_time - 123.4, + 10**-5) + self.assertLess(mca1_preset_time - 10, + 10**-5) + + mca0_live_time = self.sfh5["/1.1/instrument/mca_0/live_time"] + mca1_live_time = self.sfh5["/1.1/instrument/mca_1/live_time"] + self.assertLess(mca0_live_time - 234.5, + 10**-5) + self.assertLess(mca1_live_time - 11, + 10**-5) + + mca0_elapsed_time = self.sfh5["/1.1/instrument/mca_0/elapsed_time"] + mca1_elapsed_time = self.sfh5["/1.1/instrument/mca_1/elapsed_time"] + self.assertLess(mca0_elapsed_time - 345.6, + 10**-5) + self.assertLess(mca1_elapsed_time - 12, + 10**-5) + + +sftext_no_cols = r"""#F C:/DATA\test.mca +#D Thu Jul 7 08:40:19 2016 + +#S 1 31oct98.dat 22.1 If4 +#D Thu Jul 7 08:40:19 2016 +#C no data cols, one mca analyser, single spectrum +#@MCA %16C +#@CHANN 151 0 150 1 +#@CALIB 0 2 0 +@A 789 784 788 814 847 862 880 904 925 955 987 1015 1031 1070 1111 1139 \ +1203 1236 1290 1392 1492 1558 1688 1813 1977 2119 2346 2699 3121 3542 4102 4970 \ +6071 7611 10426 16188 28266 40348 50539 55555 56162 54162 47102 35718 24588 17034 12994 11444 \ +11808 13461 15687 18885 23827 31578 41999 49556 58084 59415 59456 55698 44525 28219 17680 12881 \ +9518 7415 6155 5246 4646 3978 3612 3299 3020 2761 2670 2472 2500 2310 2286 2106 \ +1989 1890 1782 1655 1421 1293 1135 990 879 757 672 618 532 488 445 424 \ +414 373 351 325 307 284 270 247 228 213 199 187 183 176 164 156 \ +153 140 142 130 118 118 103 101 97 86 90 86 87 81 75 82 \ +80 76 77 75 76 77 62 69 74 60 65 68 65 58 63 64 \ +63 59 60 56 57 60 55 + +#S 2 31oct98.dat 22.1 If4 +#D Thu Jul 7 08:40:19 2016 +#C no data cols, one mca analyser, multiple spectra +#@MCA %16C +#@CHANN 3 0 2 1 +#@CALIB 1 2 3 +#@CTIME 123.4 234.5 345.6 +@A 0 1 2 +@A 10 9 8 +@A 1 1 1.1 +@A 3.1 4 5 +@A 7 6 5 +@A 1 1 1 +@A 6 7.7 8 +@A 4 3 2 +@A 1 1 1 + +#S 3 31oct98.dat 22.1 If4 +#D Thu Jul 7 08:40:19 2016 +#C no data cols, 3 mca analysers, multiple spectra +#@MCADEV 1 +#@MCA %16C +#@CHANN 3 0 2 1 +#@CALIB 1 2 3 +#@CTIME 123.4 234.5 345.6 +#@MCADEV 2 +#@MCA %16C +#@CHANN 3 0 2 1 +#@CALIB 1 2 3 +#@CTIME 123.4 234.5 345.6 +#@MCADEV 3 +#@MCA %16C +#@CHANN 3 0 2 1 +#@CALIB 1 2 3 +#@CTIME 123.4 234.5 345.6 +@A 0 1 2 +@A 10 9 8 +@A 1 1 1.1 +@A 3.1 4 5 +@A 7 6 5 +@A 1 1 1 +@A 6 7.7 8 +@A 4 3 2 +@A 1 1 1 +""" + + +class TestSpecH5NoDataCols(unittest.TestCase): + """Test reading SPEC files with only MCA data""" + @classmethod + def setUpClass(cls): + fd, cls.fname = tempfile.mkstemp() + if sys.version_info < (3, ): + os.write(fd, sftext_no_cols) + else: + os.write(fd, bytes(sftext_no_cols, 'ascii')) + os.close(fd) + + @classmethod + def tearDownClass(cls): + os.unlink(cls.fname) + + def setUp(self): + self.sfh5 = SpecH5(self.fname) + + def tearDown(self): + self.sfh5.close() + + def testScan1(self): + # 1.1: single analyser, single spectrum, 151 channels + self.assertIn("mca_0", + self.sfh5["1.1/instrument/"]) + self.assertEqual(self.sfh5["1.1/instrument/mca_0/data"].shape, + (1, 151)) + self.assertNotIn("mca_1", + self.sfh5["1.1/instrument/"]) + + def testScan2(self): + # 2.1: single analyser, 9 spectra, 3 channels + self.assertIn("mca_0", + self.sfh5["2.1/instrument/"]) + self.assertEqual(self.sfh5["2.1/instrument/mca_0/data"].shape, + (9, 3)) + self.assertNotIn("mca_1", + self.sfh5["2.1/instrument/"]) + + def testScan3(self): + # 3.1: 3 analysers, 3 spectra/analyser, 3 channels + for i in range(3): + self.assertIn("mca_%d" % i, + self.sfh5["3.1/instrument/"]) + self.assertEqual( + self.sfh5["3.1/instrument/mca_%d/data" % i].shape, + (3, 3)) + + self.assertNotIn("mca_3", + self.sfh5["3.1/instrument/"]) + + +sf_text_slash = r"""#F /data/id09/archive/logspecfiles/laue/2016/scan_231_laue_16-11-29.dat +#D Sat Dec 10 22:20:59 2016 +#O0 Pslit/HGap MRTSlit%UP + +#S 1 laue_16-11-29.log 231.1 PD3/A +#D Sat Dec 10 22:20:59 2016 +#P0 180.005 -0.66875 +#N 2 +#L GONY/mm PD3%A +-2.015 5.250424e-05 +-2.01 5.30798e-05 +-2.005 5.281903e-05 +-2 5.220436e-05 +""" + + +class TestSpecH5SlashInLabels(unittest.TestCase): + """Test reading SPEC files with labels containing a / character + + The / character must be substituted with a % + """ + @classmethod + def setUpClass(cls): + fd, cls.fname = tempfile.mkstemp() + if sys.version_info < (3, ): + os.write(fd, sf_text_slash) + else: + os.write(fd, bytes(sf_text_slash, 'ascii')) + os.close(fd) + + @classmethod + def tearDownClass(cls): + os.unlink(cls.fname) + + def setUp(self): + self.sfh5 = SpecH5(self.fname) + + def tearDown(self): + self.sfh5.close() + + def testLabels(self): + """Ensure `/` is substituted with `%` and + ensure legitimate `%` in names are still working""" + self.assertEqual(list(self.sfh5["1.1/measurement/"].keys()), + ["GONY%mm", "PD3%A"]) + + # substituted "%" + self.assertIn("GONY%mm", + self.sfh5["1.1/measurement/"]) + self.assertNotIn("GONY/mm", + self.sfh5["1.1/measurement/"]) + self.assertAlmostEqual(self.sfh5["1.1/measurement/GONY%mm"][0], + -2.015, places=4) + # legitimate "%" + self.assertIn("PD3%A", + self.sfh5["1.1/measurement/"]) + + def testMotors(self): + """Ensure `/` is substituted with `%` and + ensure legitimate `%` in names are still working""" + self.assertEqual(list(self.sfh5["1.1/instrument/positioners"].keys()), + ["Pslit%HGap", "MRTSlit%UP"]) + # substituted "%" + self.assertIn("Pslit%HGap", + self.sfh5["1.1/instrument/positioners"]) + self.assertNotIn("Pslit/HGap", + self.sfh5["1.1/instrument/positioners"]) + self.assertAlmostEqual( + self.sfh5["1.1/instrument/positioners/Pslit%HGap"], + 180.005, places=4) + # legitimate "%" + self.assertIn("MRTSlit%UP", + self.sfh5["1.1/instrument/positioners"]) + + +def testUnitCellUBMatrix(tmp_path): + """Test unit cell (#G1) and UB matrix (#G3)""" + file_path = tmp_path / "spec.dat" + file_path.write_bytes(bytes(""" +#S 1 OK +#G1 0 1 2 3 4 5 +#G3 0 1 2 3 4 5 6 7 8 +""", encoding="ascii")) + with SpecH5(str(file_path)) as spech5: + assert numpy.array_equal( + spech5["/1.1/sample/ub_matrix"], + numpy.arange(9).reshape(1, 3, 3)) + assert numpy.array_equal( + spech5["/1.1/sample/unit_cell"], [[0, 1, 2, 3, 4, 5]]) + assert numpy.array_equal( + spech5["/1.1/sample/unit_cell_abc"], [0, 1, 2]) + assert numpy.array_equal( + spech5["/1.1/sample/unit_cell_alphabetagamma"], [3, 4, 5]) + + +def testMalformedUnitCellUBMatrix(tmp_path): + """Test malformed unit cell (#G1) and UB matrix (#G3): 1 value""" + file_path = tmp_path / "spec.dat" + file_path.write_bytes(bytes(""" +#S 1 all malformed=0 +#G1 0 +#G3 0 +""", encoding="ascii")) + with SpecH5(str(file_path)) as spech5: + assert "sample" not in spech5["1.1"] + + +def testMalformedUBMatrix(tmp_path): + """Test malformed UB matrix (#G3): all zeros""" + file_path = tmp_path / "spec.dat" + file_path.write_bytes(bytes(""" +#S 1 G3 all 0 +#G1 0 1 2 3 4 5 +#G3 0 0 0 0 0 0 0 0 0 +""", encoding="ascii")) + with SpecH5(str(file_path)) as spech5: + assert "ub_matrix" not in spech5["/1.1/sample"] + assert numpy.array_equal( + spech5["/1.1/sample/unit_cell"], [[0, 1, 2, 3, 4, 5]]) + assert numpy.array_equal( + spech5["/1.1/sample/unit_cell_abc"], [0, 1, 2]) + assert numpy.array_equal( + spech5["/1.1/sample/unit_cell_alphabetagamma"], [3, 4, 5]) + + +def testMalformedUnitCell(tmp_path): + """Test malformed unit cell (#G1): missing values""" + file_path = tmp_path / "spec.dat" + file_path.write_bytes(bytes(""" +#S 1 G1 malformed missing values +#G1 0 1 2 +#G3 0 1 2 3 4 5 6 7 8 +""", encoding="ascii")) + with SpecH5(str(file_path)) as spech5: + assert "unit_cell" not in spech5["/1.1/sample"] + assert "unit_cell_abc" not in spech5["/1.1/sample"] + assert "unit_cell_alphabetagamma" not in spech5["/1.1/sample"] + assert numpy.array_equal( + spech5["/1.1/sample/ub_matrix"], + numpy.arange(9).reshape(1, 3, 3)) diff --git a/src/silx/io/test/test_spectoh5.py b/src/silx/io/test/test_spectoh5.py new file mode 100644 index 0000000..66bf8d6 --- /dev/null +++ b/src/silx/io/test/test_spectoh5.py @@ -0,0 +1,183 @@ +# coding: utf-8 +# /*########################################################################## +# Copyright (C) 2016-2019 European Synchrotron Radiation Facility +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. +# +# ############################################################################*/ +"""Tests for SpecFile to HDF5 converter""" + +from numpy import array_equal +import os +import sys +import tempfile +import unittest + +import h5py + +from ..spech5 import SpecH5, SpecH5Group +from ..convert import convert, write_to_h5 +from ..utils import h5py_read_dataset + +__authors__ = ["P. Knobel"] +__license__ = "MIT" +__date__ = "12/02/2018" + + +sfdata = b"""#F /tmp/sf.dat +#E 1455180875 +#D Thu Feb 11 09:54:35 2016 +#C imaging User = opid17 +#O0 Pslit HGap MRTSlit UP MRTSlit DOWN +#O1 Sslit1 VOff Sslit1 HOff Sslit1 VGap +#o0 pshg mrtu mrtd +#o2 ss1vo ss1ho ss1vg + +#J0 Seconds IA ion.mono Current +#J1 xbpmc2 idgap1 Inorm + +#S 1 ascan ss1vo -4.55687 -0.556875 40 0.2 +#D Thu Feb 11 09:55:20 2016 +#T 0.2 (Seconds) +#P0 180.005 -0.66875 0.87125 +#P1 14.74255 16.197579 12.238283 +#N 4 +#L MRTSlit UP second column 3rd_col +-1.23 5.89 8 +8.478100E+01 5 1.56 +3.14 2.73 -3.14 +1.2 2.3 3.4 + +#S 1 aaaaaa +#D Thu Feb 11 10:00:32 2016 +#@MCADEV 1 +#@MCA %16C +#@CHANN 3 0 2 1 +#@CALIB 1 2 3 +#N 3 +#L uno duo +1 2 +@A 0 1 2 +@A 10 9 8 +3 4 +@A 3.1 4 5 +@A 7 6 5 +5 6 +@A 6 7.7 8 +@A 4 3 2 +""" + + +class TestConvertSpecHDF5(unittest.TestCase): + @classmethod + def setUpClass(cls): + fd, cls.spec_fname = tempfile.mkstemp(prefix="TestConvertSpecHDF5") + os.write(fd, sfdata) + os.close(fd) + + fd, cls.h5_fname = tempfile.mkstemp(prefix="TestConvertSpecHDF5") + # Close and delete (we just need the name) + os.close(fd) + os.unlink(cls.h5_fname) + + @classmethod + def tearDownClass(cls): + os.unlink(cls.spec_fname) + + def setUp(self): + convert(self.spec_fname, self.h5_fname) + + self.sfh5 = SpecH5(self.spec_fname) + self.h5f = h5py.File(self.h5_fname, "a") + + def tearDown(self): + self.h5f.close() + self.sfh5.close() + os.unlink(self.h5_fname) + + def testAppendToHDF5(self): + write_to_h5(self.sfh5, self.h5f, h5path="/foo/bar/spam") + self.assertTrue( + array_equal(self.h5f["/1.2/measurement/mca_1/data"], + self.h5f["/foo/bar/spam/1.2/measurement/mca_1/data"]) + ) + + def testWriteSpecH5Group(self): + """Test passing a SpecH5Group as parameter, instead of a Spec filename + or a SpecH5.""" + g = self.sfh5["1.1/instrument"] + self.assertIsInstance(g, SpecH5Group) # let's be paranoid + write_to_h5(g, self.h5f, h5path="my instruments") + + self.assertAlmostEqual(self.h5f["my instruments/positioners/Sslit1 HOff"][tuple()], + 16.197579, places=4) + + def testTitle(self): + """Test the value of a dataset""" + title12 = h5py_read_dataset(self.h5f["/1.2/title"]) + self.assertEqual(title12, + u"aaaaaa") + + def testAttrs(self): + # Test root group (file) attributes + self.assertEqual(self.h5f.attrs["NX_class"], + u"NXroot") + # Test dataset attributes + ds = self.h5f["/1.2/instrument/mca_1/data"] + self.assertTrue("interpretation" in ds.attrs) + self.assertEqual(list(ds.attrs.values()), + [u"spectrum"]) + # Test group attributes + grp = self.h5f["1.1"] + self.assertEqual(grp.attrs["NX_class"], + u"NXentry") + self.assertEqual(len(list(grp.attrs.keys())), + 1) + + def testHdf5HasSameMembers(self): + spec_member_list = [] + + def append_spec_members(name): + spec_member_list.append(name) + self.sfh5.visit(append_spec_members) + + hdf5_member_list = [] + + def append_hdf5_members(name): + hdf5_member_list.append(name) + self.h5f.visit(append_hdf5_members) + + # 1. For some reason, h5py visit method doesn't include the leading + # "/" character when it passes the member name to the function, + # even though an explicit the .name attribute of a member will + # have a leading "/" + spec_member_list = [m.lstrip("/") for m in spec_member_list] + + self.assertEqual(set(hdf5_member_list), + set(spec_member_list)) + + def testLinks(self): + self.assertTrue( + array_equal(self.sfh5["/1.2/measurement/mca_0/data"], + self.h5f["/1.2/measurement/mca_0/data"]) + ) + self.assertTrue( + array_equal(self.h5f["/1.2/instrument/mca_1/channels"], + self.h5f["/1.2/measurement/mca_1/info/channels"]) + ) diff --git a/src/silx/io/test/test_url.py b/src/silx/io/test/test_url.py new file mode 100644 index 0000000..7346391 --- /dev/null +++ b/src/silx/io/test/test_url.py @@ -0,0 +1,217 @@ +# coding: utf-8 +# /*########################################################################## +# Copyright (C) 2016-2017 European Synchrotron Radiation Facility +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. +# +# ############################################################################*/ +"""Tests for url module""" + +__authors__ = ["V. Valls"] +__license__ = "MIT" +__date__ = "29/01/2018" + + +import unittest +from ..url import DataUrl + + +class TestDataUrl(unittest.TestCase): + + def assertUrl(self, url, expected): + self.assertEqual(url.is_valid(), expected[0]) + self.assertEqual(url.is_absolute(), expected[1]) + self.assertEqual(url.scheme(), expected[2]) + self.assertEqual(url.file_path(), expected[3]) + self.assertEqual(url.data_path(), expected[4]) + self.assertEqual(url.data_slice(), expected[5]) + + def test_fabio_absolute(self): + url = DataUrl("fabio:///data/image.edf?slice=2") + expected = [True, True, "fabio", "/data/image.edf", None, (2, )] + self.assertUrl(url, expected) + + def test_fabio_absolute_windows(self): + url = DataUrl("fabio:///C:/data/image.edf?slice=2") + expected = [True, True, "fabio", "C:/data/image.edf", None, (2, )] + self.assertUrl(url, expected) + + def test_silx_absolute(self): + url = DataUrl("silx:///data/image.h5?path=/data/dataset&slice=1,5") + expected = [True, True, "silx", "/data/image.h5", "/data/dataset", (1, 5)] + self.assertUrl(url, expected) + + def test_commandline_shell_separator(self): + url = DataUrl("silx:///data/image.h5::path=/data/dataset&slice=1,5") + expected = [True, True, "silx", "/data/image.h5", "/data/dataset", (1, 5)] + self.assertUrl(url, expected) + + def test_silx_absolute2(self): + url = DataUrl("silx:///data/image.edf?/scan_0/detector/data") + expected = [True, True, "silx", "/data/image.edf", "/scan_0/detector/data", None] + self.assertUrl(url, expected) + + def test_silx_absolute_windows(self): + url = DataUrl("silx:///C:/data/image.h5?/scan_0/detector/data") + expected = [True, True, "silx", "C:/data/image.h5", "/scan_0/detector/data", None] + self.assertUrl(url, expected) + + def test_silx_relative(self): + url = DataUrl("silx:./image.h5") + expected = [True, False, "silx", "./image.h5", None, None] + self.assertUrl(url, expected) + + def test_fabio_relative(self): + url = DataUrl("fabio:./image.edf") + expected = [True, False, "fabio", "./image.edf", None, None] + self.assertUrl(url, expected) + + def test_silx_relative2(self): + url = DataUrl("silx:image.h5") + expected = [True, False, "silx", "image.h5", None, None] + self.assertUrl(url, expected) + + def test_fabio_relative2(self): + url = DataUrl("fabio:image.edf") + expected = [True, False, "fabio", "image.edf", None, None] + self.assertUrl(url, expected) + + def test_file_relative(self): + url = DataUrl("image.edf") + expected = [True, False, None, "image.edf", None, None] + self.assertUrl(url, expected) + + def test_file_relative2(self): + url = DataUrl("./foo/bar/image.edf") + expected = [True, False, None, "./foo/bar/image.edf", None, None] + self.assertUrl(url, expected) + + def test_file_relative3(self): + url = DataUrl("foo/bar/image.edf") + expected = [True, False, None, "foo/bar/image.edf", None, None] + self.assertUrl(url, expected) + + def test_file_absolute(self): + url = DataUrl("/data/image.edf") + expected = [True, True, None, "/data/image.edf", None, None] + self.assertUrl(url, expected) + + def test_file_absolute_windows(self): + url = DataUrl("C:/data/image.edf") + expected = [True, True, None, "C:/data/image.edf", None, None] + self.assertUrl(url, expected) + + def test_absolute_with_path(self): + url = DataUrl("/foo/foobar.h5?/foo/bar") + expected = [True, True, None, "/foo/foobar.h5", "/foo/bar", None] + self.assertUrl(url, expected) + + def test_windows_file_data_slice(self): + url = DataUrl("C:/foo/foobar.h5?path=/foo/bar&slice=5,1") + expected = [True, True, None, "C:/foo/foobar.h5", "/foo/bar", (5, 1)] + self.assertUrl(url, expected) + + def test_scheme_file_data_slice(self): + url = DataUrl("silx:/foo/foobar.h5?path=/foo/bar&slice=5,1") + expected = [True, True, "silx", "/foo/foobar.h5", "/foo/bar", (5, 1)] + self.assertUrl(url, expected) + + def test_scheme_windows_file_data_slice(self): + url = DataUrl("silx:C:/foo/foobar.h5?path=/foo/bar&slice=5,1") + expected = [True, True, "silx", "C:/foo/foobar.h5", "/foo/bar", (5, 1)] + self.assertUrl(url, expected) + + def test_empty(self): + url = DataUrl("") + expected = [False, False, None, "", None, None] + self.assertUrl(url, expected) + + def test_unknown_scheme(self): + url = DataUrl("foo:/foo/foobar.h5?path=/foo/bar&slice=5,1") + expected = [False, True, "foo", "/foo/foobar.h5", "/foo/bar", (5, 1)] + self.assertUrl(url, expected) + + def test_slice(self): + url = DataUrl("/a.h5?path=/b&slice=5,1") + expected = [True, True, None, "/a.h5", "/b", (5, 1)] + self.assertUrl(url, expected) + + def test_slice2(self): + url = DataUrl("/a.h5?path=/b&slice=2:5") + expected = [True, True, None, "/a.h5", "/b", (slice(2, 5),)] + self.assertUrl(url, expected) + + def test_slice3(self): + url = DataUrl("/a.h5?path=/b&slice=::2") + expected = [True, True, None, "/a.h5", "/b", (slice(None, None, 2),)] + self.assertUrl(url, expected) + + def test_slice_ellipsis(self): + url = DataUrl("/a.h5?path=/b&slice=...") + expected = [True, True, None, "/a.h5", "/b", (Ellipsis, )] + self.assertUrl(url, expected) + + def test_slice_slicing(self): + url = DataUrl("/a.h5?path=/b&slice=:") + expected = [True, True, None, "/a.h5", "/b", (slice(None), )] + self.assertUrl(url, expected) + + def test_slice_missing_element(self): + url = DataUrl("/a.h5?path=/b&slice=5,,1") + expected = [False, True, None, "/a.h5", "/b", None] + self.assertUrl(url, expected) + + def test_slice_no_elements(self): + url = DataUrl("/a.h5?path=/b&slice=") + expected = [False, True, None, "/a.h5", "/b", None] + self.assertUrl(url, expected) + + def test_create_relative_url(self): + url = DataUrl(scheme="silx", file_path="./foo.h5", data_path="/", data_slice=(5, 1)) + self.assertFalse(url.is_absolute()) + url2 = DataUrl(url.path()) + self.assertEqual(url, url2) + + def test_create_absolute_url(self): + url = DataUrl(scheme="silx", file_path="/foo.h5", data_path="/", data_slice=(5, 1)) + url2 = DataUrl(url.path()) + self.assertEqual(url, url2) + + def test_create_absolute_windows_url(self): + url = DataUrl(scheme="silx", file_path="C:/foo.h5", data_path="/", data_slice=(5, 1)) + url2 = DataUrl(url.path()) + self.assertEqual(url, url2) + + def test_create_slice_url(self): + url = DataUrl(scheme="silx", file_path="/foo.h5", data_path="/", data_slice=(5, 1, Ellipsis, slice(None))) + url2 = DataUrl(url.path()) + self.assertEqual(url, url2) + + def test_wrong_url(self): + url = DataUrl(scheme="silx", file_path="/foo.h5", data_slice=(5, 1)) + self.assertFalse(url.is_valid()) + + def test_path_creation(self): + """make sure the construction of path succeed and that we can + recreate a DataUrl from a path""" + for data_slice in (1, (1,)): + with self.subTest(data_slice=data_slice): + url = DataUrl(scheme="silx", file_path="/foo.h5", data_slice=data_slice) + path = url.path() + DataUrl(path=path) diff --git a/src/silx/io/test/test_utils.py b/src/silx/io/test/test_utils.py new file mode 100644 index 0000000..cc34100 --- /dev/null +++ b/src/silx/io/test/test_utils.py @@ -0,0 +1,923 @@ +# coding: utf-8 +# /*########################################################################## +# Copyright (C) 2016-2019 European Synchrotron Radiation Facility +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. +# +# ############################################################################*/ +"""Tests for utils module""" + +import io +import numpy +import os +import re +import shutil +import tempfile +import unittest +import sys + +from .. import utils +from ..._version import calc_hexversion +import silx.io.url + +import h5py +from ..utils import h5ls +from silx.io import commonh5 + +import fabio + +__authors__ = ["P. Knobel"] +__license__ = "MIT" +__date__ = "03/12/2020" + +expected_spec1 = r"""#F .* +#D .* + +#S 1 Ordinate1 +#D .* +#N 2 +#L Abscissa Ordinate1 +1 4\.00 +2 5\.00 +3 6\.00 +""" + +expected_spec2 = expected_spec1 + r""" +#S 2 Ordinate2 +#D .* +#N 2 +#L Abscissa Ordinate2 +1 7\.00 +2 8\.00 +3 9\.00 +""" + +expected_spec2reg = r"""#F .* +#D .* + +#S 1 Ordinate1 +#D .* +#N 3 +#L Abscissa Ordinate1 Ordinate2 +1 4\.00 7\.00 +2 5\.00 8\.00 +3 6\.00 9\.00 +""" + +expected_spec2irr = expected_spec1 + r""" +#S 2 Ordinate2 +#D .* +#N 2 +#L Abscissa Ordinate2 +1 7\.00 +2 8\.00 +""" + +expected_csv = r"""Abscissa;Ordinate1;Ordinate2 +1;4\.00;7\.00e\+00 +2;5\.00;8\.00e\+00 +3;6\.00;9\.00e\+00 +""" + +expected_csv2 = r"""x;y0;y1 +1;4\.00;7\.00e\+00 +2;5\.00;8\.00e\+00 +3;6\.00;9\.00e\+00 +""" + + +class TestSave(unittest.TestCase): + """Test saving curves as SpecFile: + """ + + def setUp(self): + self.tempdir = tempfile.mkdtemp() + self.spec_fname = os.path.join(self.tempdir, "savespec.dat") + self.csv_fname = os.path.join(self.tempdir, "savecsv.csv") + self.npy_fname = os.path.join(self.tempdir, "savenpy.npy") + + self.x = [1, 2, 3] + self.xlab = "Abscissa" + self.y = [[4, 5, 6], [7, 8, 9]] + self.y_irr = [[4, 5, 6], [7, 8]] + self.ylabs = ["Ordinate1", "Ordinate2"] + + def tearDown(self): + if os.path.isfile(self.spec_fname): + os.unlink(self.spec_fname) + if os.path.isfile(self.csv_fname): + os.unlink(self.csv_fname) + if os.path.isfile(self.npy_fname): + os.unlink(self.npy_fname) + shutil.rmtree(self.tempdir) + + def test_save_csv(self): + utils.save1D(self.csv_fname, self.x, self.y, + xlabel=self.xlab, ylabels=self.ylabs, + filetype="csv", fmt=["%d", "%.2f", "%.2e"], + csvdelim=";", autoheader=True) + + csvf = open(self.csv_fname) + actual_csv = csvf.read() + csvf.close() + + self.assertRegex(actual_csv, expected_csv) + + def test_save_npy(self): + """npy file is saved with numpy.save after building a numpy array + and converting it to a named record array""" + npyf = open(self.npy_fname, "wb") + utils.save1D(npyf, self.x, self.y, + xlabel=self.xlab, ylabels=self.ylabs) + npyf.close() + + npy_recarray = numpy.load(self.npy_fname) + + self.assertEqual(npy_recarray.shape, (3,)) + self.assertTrue(numpy.array_equal(npy_recarray['Ordinate1'], + numpy.array((4, 5, 6)))) + + def test_savespec_filename(self): + """Save SpecFile using savespec()""" + utils.savespec(self.spec_fname, self.x, self.y[0], xlabel=self.xlab, + ylabel=self.ylabs[0], fmt=["%d", "%.2f"], + close_file=True, scan_number=1) + + specf = open(self.spec_fname) + actual_spec = specf.read() + specf.close() + self.assertRegex(actual_spec, expected_spec1) + + def test_savespec_file_handle(self): + """Save SpecFile using savespec(), passing a file handle""" + # first savespec: open, write file header, save y[0] as scan 1, + # return file handle + specf = utils.savespec(self.spec_fname, self.x, self.y[0], + xlabel=self.xlab, ylabel=self.ylabs[0], + fmt=["%d", "%.2f"], close_file=False) + + # second savespec: save y[1] as scan 2, close file + utils.savespec(specf, self.x, self.y[1], xlabel=self.xlab, + ylabel=self.ylabs[1], fmt=["%d", "%.2f"], + write_file_header=False, close_file=True, + scan_number=2) + + specf = open(self.spec_fname) + actual_spec = specf.read() + specf.close() + self.assertRegex(actual_spec, expected_spec2) + + def test_save_spec_reg(self): + """Save SpecFile using save() on a regular pattern""" + utils.save1D(self.spec_fname, self.x, self.y, xlabel=self.xlab, + ylabels=self.ylabs, filetype="spec", fmt=["%d", "%.2f"]) + + specf = open(self.spec_fname) + actual_spec = specf.read() + specf.close() + + self.assertRegex(actual_spec, expected_spec2reg) + + def test_save_spec_irr(self): + """Save SpecFile using save() on an irregular pattern""" + # invalid test case ?! + return + utils.save1D(self.spec_fname, self.x, self.y_irr, xlabel=self.xlab, + ylabels=self.ylabs, filetype="spec", fmt=["%d", "%.2f"]) + + specf = open(self.spec_fname) + actual_spec = specf.read() + specf.close() + self.assertRegex(actual_spec, expected_spec2irr) + + def test_save_csv_no_labels(self): + """Save csv using save(), with autoheader=True but + xlabel=None and ylabels=None + This is a non-regression test for bug #223""" + self.tempdir = tempfile.mkdtemp() + self.spec_fname = os.path.join(self.tempdir, "savespec.dat") + self.csv_fname = os.path.join(self.tempdir, "savecsv.csv") + self.npy_fname = os.path.join(self.tempdir, "savenpy.npy") + + self.x = [1, 2, 3] + self.xlab = "Abscissa" + self.y = [[4, 5, 6], [7, 8, 9]] + self.ylabs = ["Ordinate1", "Ordinate2"] + utils.save1D(self.csv_fname, self.x, self.y, + autoheader=True, fmt=["%d", "%.2f", "%.2e"]) + + csvf = open(self.csv_fname) + actual_csv = csvf.read() + csvf.close() + self.assertRegex(actual_csv, expected_csv2) + + +def assert_match_any_string_in_list(test, pattern, list_of_strings): + for string_ in list_of_strings: + if re.match(pattern, string_): + return True + return False + + +class TestH5Ls(unittest.TestCase): + """Test displaying the following HDF5 file structure: + + +foo + +bar + <HDF5 dataset "spam": shape (2, 2), type "<i8"> + <HDF5 dataset "tmp": shape (3,), type "<i8"> + <HDF5 dataset "data": shape (1,), type "<f8"> + + """ + + def assertMatchAnyStringInList(self, pattern, list_of_strings): + for string_ in list_of_strings: + if re.match(pattern, string_): + return None + raise AssertionError("regex pattern %s does not match any" % pattern + + " string in list " + str(list_of_strings)) + + def testHdf5(self): + fd, self.h5_fname = tempfile.mkstemp(text=False) + # Close and delete (we just want the name) + os.close(fd) + os.unlink(self.h5_fname) + self.h5f = h5py.File(self.h5_fname, "w") + self.h5f["/foo/bar/tmp"] = [1, 2, 3] + self.h5f["/foo/bar/spam"] = [[1, 2], [3, 4]] + self.h5f["/foo/data"] = [3.14] + self.h5f.close() + + rep = h5ls(self.h5_fname) + lines = rep.split("\n") + + self.assertIn("+foo", lines) + self.assertIn("\t+bar", lines) + + match = r'\t\t<HDF5 dataset "tmp": shape \(3,\), type "<i[48]">' + self.assertMatchAnyStringInList(match, lines) + match = r'\t\t<HDF5 dataset "spam": shape \(2, 2\), type "<i[48]">' + self.assertMatchAnyStringInList(match, lines) + match = r'\t<HDF5 dataset "data": shape \(1,\), type "<f[48]">' + self.assertMatchAnyStringInList(match, lines) + + os.unlink(self.h5_fname) + + # Following test case disabled d/t errors on AppVeyor: + # os.unlink(spec_fname) + # PermissionError: [WinError 32] The process cannot access the file because + # it is being used by another process: 'C:\\...\\savespec.dat' + + # def testSpec(self): + # tempdir = tempfile.mkdtemp() + # spec_fname = os.path.join(tempdir, "savespec.dat") + # + # x = [1, 2, 3] + # xlab = "Abscissa" + # y = [[4, 5, 6], [7, 8, 9]] + # ylabs = ["Ordinate1", "Ordinate2"] + # utils.save1D(spec_fname, x, y, xlabel=xlab, + # ylabels=ylabs, filetype="spec", + # fmt=["%d", "%.2f"]) + # + # rep = h5ls(spec_fname) + # lines = rep.split("\n") + # self.assertIn("+1.1", lines) + # self.assertIn("\t+instrument", lines) + # + # self.assertMatchAnyStringInList( + # r'\t\t\t<SPEC dataset "file_header": shape \(\), type "|S60">', + # lines) + # self.assertMatchAnyStringInList( + # r'\t\t<SPEC dataset "Ordinate1": shape \(3L?,\), type "<f4">', + # lines) + # + # os.unlink(spec_fname) + # shutil.rmtree(tempdir) + + +class TestOpen(unittest.TestCase): + """Test `silx.io.utils.open` function.""" + + @classmethod + def setUpClass(cls): + cls.tmp_directory = tempfile.mkdtemp() + cls.createResources(cls.tmp_directory) + + @classmethod + def createResources(cls, directory): + + cls.h5_filename = os.path.join(directory, "test.h5") + h5 = h5py.File(cls.h5_filename, mode="w") + h5["group/group/dataset"] = 50 + h5.close() + + cls.spec_filename = os.path.join(directory, "test.dat") + utils.savespec(cls.spec_filename, [1], [1.1], xlabel="x", ylabel="y", + fmt=["%d", "%.2f"], close_file=True, scan_number=1) + + cls.edf_filename = os.path.join(directory, "test.edf") + header = fabio.fabioimage.OrderedDict() + header["integer"] = "10" + data = numpy.array([[10, 50], [50, 10]]) + fabiofile = fabio.edfimage.EdfImage(data, header) + fabiofile.write(cls.edf_filename) + + cls.txt_filename = os.path.join(directory, "test.txt") + f = io.open(cls.txt_filename, "w+t") + f.write(u"Kikoo") + f.close() + + cls.missing_filename = os.path.join(directory, "test.missing") + + @classmethod + def tearDownClass(cls): + shutil.rmtree(cls.tmp_directory) + + def testH5(self): + f = utils.open(self.h5_filename) + self.assertIsNotNone(f) + self.assertIsInstance(f, h5py.File) + f.close() + + def testH5With(self): + with utils.open(self.h5_filename) as f: + self.assertIsNotNone(f) + self.assertIsInstance(f, h5py.File) + + def testH5_withPath(self): + f = utils.open(self.h5_filename + "::/group/group/dataset") + self.assertIsNotNone(f) + self.assertEqual(f.h5py_class, h5py.Dataset) + self.assertEqual(f[()], 50) + f.close() + + def testH5With_withPath(self): + with utils.open(self.h5_filename + "::/group/group") as f: + self.assertIsNotNone(f) + self.assertEqual(f.h5py_class, h5py.Group) + self.assertIn("dataset", f) + + def testSpec(self): + f = utils.open(self.spec_filename) + self.assertIsNotNone(f) + self.assertEqual(f.h5py_class, h5py.File) + f.close() + + def testSpecWith(self): + with utils.open(self.spec_filename) as f: + self.assertIsNotNone(f) + self.assertEqual(f.h5py_class, h5py.File) + + def testEdf(self): + f = utils.open(self.edf_filename) + self.assertIsNotNone(f) + self.assertEqual(f.h5py_class, h5py.File) + f.close() + + def testEdfWith(self): + with utils.open(self.edf_filename) as f: + self.assertIsNotNone(f) + self.assertEqual(f.h5py_class, h5py.File) + + def testUnsupported(self): + self.assertRaises(IOError, utils.open, self.txt_filename) + + def testNotExists(self): + # load it + self.assertRaises(IOError, utils.open, self.missing_filename) + + def test_silx_scheme(self): + url = silx.io.url.DataUrl(scheme="silx", file_path=self.h5_filename, data_path="/") + with utils.open(url.path()) as f: + self.assertIsNotNone(f) + self.assertTrue(silx.io.utils.is_file(f)) + + def test_fabio_scheme(self): + url = silx.io.url.DataUrl(scheme="fabio", file_path=self.edf_filename) + self.assertRaises(IOError, utils.open, url.path()) + + def test_bad_url(self): + url = silx.io.url.DataUrl(scheme="sil", file_path=self.h5_filename) + self.assertRaises(IOError, utils.open, url.path()) + + def test_sliced_url(self): + url = silx.io.url.DataUrl(file_path=self.h5_filename, data_slice=(5,)) + self.assertRaises(IOError, utils.open, url.path()) + + +class TestNodes(unittest.TestCase): + """Test `silx.io.utils.is_` functions.""" + + def test_real_h5py_objects(self): + name = tempfile.mktemp(suffix=".h5") + try: + with h5py.File(name, "w") as h5file: + h5group = h5file.create_group("arrays") + h5dataset = h5group.create_dataset("scalar", data=10) + + self.assertTrue(utils.is_file(h5file)) + self.assertTrue(utils.is_group(h5file)) + self.assertFalse(utils.is_dataset(h5file)) + + self.assertFalse(utils.is_file(h5group)) + self.assertTrue(utils.is_group(h5group)) + self.assertFalse(utils.is_dataset(h5group)) + + self.assertFalse(utils.is_file(h5dataset)) + self.assertFalse(utils.is_group(h5dataset)) + self.assertTrue(utils.is_dataset(h5dataset)) + finally: + os.unlink(name) + + def test_h5py_like_file(self): + + class Foo(object): + + def __init__(self): + self.h5_class = utils.H5Type.FILE + + obj = Foo() + self.assertTrue(utils.is_file(obj)) + self.assertTrue(utils.is_group(obj)) + self.assertFalse(utils.is_dataset(obj)) + + def test_h5py_like_group(self): + + class Foo(object): + + def __init__(self): + self.h5_class = utils.H5Type.GROUP + + obj = Foo() + self.assertFalse(utils.is_file(obj)) + self.assertTrue(utils.is_group(obj)) + self.assertFalse(utils.is_dataset(obj)) + + def test_h5py_like_dataset(self): + + class Foo(object): + + def __init__(self): + self.h5_class = utils.H5Type.DATASET + + obj = Foo() + self.assertFalse(utils.is_file(obj)) + self.assertFalse(utils.is_group(obj)) + self.assertTrue(utils.is_dataset(obj)) + + def test_bad(self): + + class Foo(object): + + def __init__(self): + pass + + obj = Foo() + self.assertFalse(utils.is_file(obj)) + self.assertFalse(utils.is_group(obj)) + self.assertFalse(utils.is_dataset(obj)) + + def test_bad_api(self): + + class Foo(object): + + def __init__(self): + self.h5_class = int + + obj = Foo() + self.assertFalse(utils.is_file(obj)) + self.assertFalse(utils.is_group(obj)) + self.assertFalse(utils.is_dataset(obj)) + + +class TestGetData(unittest.TestCase): + """Test `silx.io.utils.get_data` function.""" + + @classmethod + def setUpClass(cls): + cls.tmp_directory = tempfile.mkdtemp() + cls.createResources(cls.tmp_directory) + + @classmethod + def createResources(cls, directory): + + cls.h5_filename = os.path.join(directory, "test.h5") + h5 = h5py.File(cls.h5_filename, mode="w") + h5["group/group/scalar"] = 50 + h5["group/group/array"] = [1, 2, 3, 4, 5] + h5["group/group/array2d"] = [[1, 2, 3, 4, 5], [6, 7, 8, 9, 10]] + h5.close() + + cls.spec_filename = os.path.join(directory, "test.dat") + utils.savespec(cls.spec_filename, [1], [1.1], xlabel="x", ylabel="y", + fmt=["%d", "%.2f"], close_file=True, scan_number=1) + + cls.edf_filename = os.path.join(directory, "test.edf") + cls.edf_multiframe_filename = os.path.join(directory, "test_multi.edf") + header = fabio.fabioimage.OrderedDict() + header["integer"] = "10" + data = numpy.array([[10, 50], [50, 10]]) + fabiofile = fabio.edfimage.EdfImage(data, header) + fabiofile.write(cls.edf_filename) + fabiofile.append_frame(data=data, header=header) + fabiofile.write(cls.edf_multiframe_filename) + + cls.txt_filename = os.path.join(directory, "test.txt") + f = io.open(cls.txt_filename, "w+t") + f.write(u"Kikoo") + f.close() + + cls.missing_filename = os.path.join(directory, "test.missing") + + @classmethod + def tearDownClass(cls): + shutil.rmtree(cls.tmp_directory) + + def test_hdf5_scalar(self): + url = "silx:%s?/group/group/scalar" % self.h5_filename + data = utils.get_data(url=url) + self.assertEqual(data, 50) + + def test_hdf5_array(self): + url = "silx:%s?/group/group/array" % self.h5_filename + data = utils.get_data(url=url) + self.assertEqual(data.shape, (5,)) + self.assertEqual(data[0], 1) + + def test_hdf5_array_slice(self): + url = "silx:%s?path=/group/group/array2d&slice=1" % self.h5_filename + data = utils.get_data(url=url) + self.assertEqual(data.shape, (5,)) + self.assertEqual(data[0], 6) + + def test_hdf5_array_slice_out_of_range(self): + url = "silx:%s?path=/group/group/array2d&slice=5" % self.h5_filename + # ValueError: h5py 2.x + # IndexError: h5py 3.x + self.assertRaises((ValueError, IndexError), utils.get_data, url) + + def test_edf_using_silx(self): + url = "silx:%s?/scan_0/instrument/detector_0/data" % self.edf_filename + data = utils.get_data(url=url) + self.assertEqual(data.shape, (2, 2)) + self.assertEqual(data[0, 0], 10) + + def test_fabio_frame(self): + url = "fabio:%s?slice=1" % self.edf_multiframe_filename + data = utils.get_data(url=url) + self.assertEqual(data.shape, (2, 2)) + self.assertEqual(data[0, 0], 10) + + def test_fabio_singleframe(self): + url = "fabio:%s?slice=0" % self.edf_filename + data = utils.get_data(url=url) + self.assertEqual(data.shape, (2, 2)) + self.assertEqual(data[0, 0], 10) + + def test_fabio_too_much_frames(self): + url = "fabio:%s?slice=..." % self.edf_multiframe_filename + self.assertRaises(ValueError, utils.get_data, url) + + def test_fabio_no_frame(self): + url = "fabio:%s" % self.edf_filename + data = utils.get_data(url=url) + self.assertEqual(data.shape, (2, 2)) + self.assertEqual(data[0, 0], 10) + + def test_unsupported_scheme(self): + url = "foo:/foo/bar" + self.assertRaises(ValueError, utils.get_data, url) + + def test_no_scheme(self): + url = "%s?path=/group/group/array2d&slice=5" % self.h5_filename + self.assertRaises((ValueError, IOError), utils.get_data, url) + + def test_file_not_exists(self): + url = "silx:/foo/bar" + self.assertRaises(IOError, utils.get_data, url) + + +def _h5_py_version_older_than(version): + v_majeur, v_mineur, v_micro = [int(i) for i in h5py.version.version.split('.')[:3]] + r_majeur, r_mineur, r_micro = [int(i) for i in version.split('.')] + return calc_hexversion(v_majeur, v_mineur, v_micro) >= calc_hexversion(r_majeur, r_mineur, r_micro) + + +@unittest.skipUnless(_h5_py_version_older_than('2.9.0'), 'h5py version < 2.9.0') +class TestRawFileToH5(unittest.TestCase): + """Test conversion of .vol file to .h5 external dataset""" + + def setUp(self): + self.tempdir = tempfile.mkdtemp() + self._vol_file = os.path.join(self.tempdir, 'test_vol.vol') + self._file_info = os.path.join(self.tempdir, 'test_vol.info.vol') + self._dataset_shape = 100, 20, 5 + data = numpy.random.random(self._dataset_shape[0] * + self._dataset_shape[1] * + self._dataset_shape[2]).astype(dtype=numpy.float32).reshape(self._dataset_shape) + numpy.save(file=self._vol_file, arr=data) + # those are storing into .noz file + assert os.path.exists(self._vol_file + '.npy') + os.rename(self._vol_file + '.npy', self._vol_file) + self.h5_file = os.path.join(self.tempdir, 'test_h5.h5') + self.external_dataset_path = '/root/my_external_dataset' + self._data_url = silx.io.url.DataUrl(file_path=self.h5_file, + data_path=self.external_dataset_path) + with open(self._file_info, 'w') as _fi: + _fi.write('NUM_X = %s\n' % self._dataset_shape[2]) + _fi.write('NUM_Y = %s\n' % self._dataset_shape[1]) + _fi.write('NUM_Z = %s\n' % self._dataset_shape[0]) + + def tearDown(self): + shutil.rmtree(self.tempdir) + + def check_dataset(self, h5_file, data_path, shape): + """Make sure the external dataset is valid""" + with h5py.File(h5_file, 'r') as _file: + return data_path in _file and _file[data_path].shape == shape + + def test_h5_file_not_existing(self): + """Test that can create a file with external dataset from scratch""" + utils.rawfile_to_h5_external_dataset(bin_file=self._vol_file, + output_url=self._data_url, + shape=(100, 20, 5), + dtype=numpy.float32) + self.assertTrue(self.check_dataset(h5_file=self.h5_file, + data_path=self.external_dataset_path, + shape=self._dataset_shape)) + os.remove(self.h5_file) + utils.vol_to_h5_external_dataset(vol_file=self._vol_file, + output_url=self._data_url, + info_file=self._file_info) + self.assertTrue(self.check_dataset(h5_file=self.h5_file, + data_path=self.external_dataset_path, + shape=self._dataset_shape)) + + def test_h5_file_existing(self): + """Test that can add the external dataset from an existing file""" + with h5py.File(self.h5_file, 'w') as _file: + _file['/root/dataset1'] = numpy.zeros((100, 100)) + _file['/root/group/dataset2'] = numpy.ones((100, 100)) + utils.rawfile_to_h5_external_dataset(bin_file=self._vol_file, + output_url=self._data_url, + shape=(100, 20, 5), + dtype=numpy.float32) + self.assertTrue(self.check_dataset(h5_file=self.h5_file, + data_path=self.external_dataset_path, + shape=self._dataset_shape)) + + def test_vol_file_not_existing(self): + """Make sure error is raised if .vol file does not exists""" + os.remove(self._vol_file) + utils.rawfile_to_h5_external_dataset(bin_file=self._vol_file, + output_url=self._data_url, + shape=(100, 20, 5), + dtype=numpy.float32) + + self.assertTrue(self.check_dataset(h5_file=self.h5_file, + data_path=self.external_dataset_path, + shape=self._dataset_shape)) + + def test_conflicts(self): + """Test several conflict cases""" + # test if path already exists + utils.rawfile_to_h5_external_dataset(bin_file=self._vol_file, + output_url=self._data_url, + shape=(100, 20, 5), + dtype=numpy.float32) + with self.assertRaises(ValueError): + utils.rawfile_to_h5_external_dataset(bin_file=self._vol_file, + output_url=self._data_url, + shape=(100, 20, 5), + overwrite=False, + dtype=numpy.float32) + + utils.rawfile_to_h5_external_dataset(bin_file=self._vol_file, + output_url=self._data_url, + shape=(100, 20, 5), + overwrite=True, + dtype=numpy.float32) + + self.assertTrue(self.check_dataset(h5_file=self.h5_file, + data_path=self.external_dataset_path, + shape=self._dataset_shape)) + + +class TestH5Strings(unittest.TestCase): + """Test HDF5 str and bytes writing and reading""" + + @classmethod + def setUpClass(cls): + cls.tempdir = tempfile.mkdtemp() + cls.vlenstr = h5py.special_dtype(vlen=str) + cls.vlenbytes = h5py.special_dtype(vlen=bytes) + try: + cls.unicode = unicode + except NameError: + cls.unicode = str + + @classmethod + def tearDownClass(cls): + shutil.rmtree(cls.tempdir) + + def setUp(self): + self.file = h5py.File(os.path.join(self.tempdir, 'file.h5'), mode="w") + + def tearDown(self): + self.file.close() + + @classmethod + def _make_array(cls, value, n): + if isinstance(value, bytes): + dtype = cls.vlenbytes + elif isinstance(value, cls.unicode): + dtype = cls.vlenstr + else: + return numpy.array([value] * n) + return numpy.array([value] * n, dtype=dtype) + + @classmethod + def _get_charset(cls, value): + if isinstance(value, bytes): + return h5py.h5t.CSET_ASCII + elif isinstance(value, cls.unicode): + return h5py.h5t.CSET_UTF8 + else: + return None + + def _check_dataset(self, value, result=None): + # Write+read scalar + if result: + decode_ascii = True + else: + decode_ascii = False + result = value + charset = self._get_charset(value) + self.file["data"] = value + data = utils.h5py_read_dataset(self.file["data"], decode_ascii=decode_ascii) + assert type(data) == type(result), data + assert data == result, data + if charset: + assert self.file["data"].id.get_type().get_cset() == charset + + # Write+read variable length + self.file["vlen_data"] = self._make_array(value, 2) + data = utils.h5py_read_dataset(self.file["vlen_data"], decode_ascii=decode_ascii, index=0) + assert type(data) == type(result), data + assert data == result, data + data = utils.h5py_read_dataset(self.file["vlen_data"], decode_ascii=decode_ascii) + numpy.testing.assert_array_equal(data, [result] * 2) + if charset: + assert self.file["vlen_data"].id.get_type().get_cset() == charset + + def _check_attribute(self, value, result=None): + if result: + decode_ascii = True + else: + decode_ascii = False + result = value + self.file.attrs["data"] = value + data = utils.h5py_read_attribute(self.file.attrs, "data", decode_ascii=decode_ascii) + assert type(data) == type(result), data + assert data == result, data + + self.file.attrs["vlen_data"] = self._make_array(value, 2) + data = utils.h5py_read_attribute(self.file.attrs, "vlen_data", decode_ascii=decode_ascii) + assert type(data[0]) == type(result), data[0] + assert data[0] == result, data[0] + numpy.testing.assert_array_equal(data, [result] * 2) + + data = utils.h5py_read_attributes(self.file.attrs, decode_ascii=decode_ascii)["vlen_data"] + assert type(data[0]) == type(result), data[0] + assert data[0] == result, data[0] + numpy.testing.assert_array_equal(data, [result] * 2) + + def test_dataset_ascii_bytes(self): + self._check_dataset(b"abc") + + def test_attribute_ascii_bytes(self): + self._check_attribute(b"abc") + + def test_dataset_ascii_bytes_decode(self): + self._check_dataset(b"abc", result="abc") + + def test_attribute_ascii_bytes_decode(self): + self._check_attribute(b"abc", result="abc") + + def test_dataset_ascii_str(self): + self._check_dataset("abc") + + def test_attribute_ascii_str(self): + self._check_attribute("abc") + + def test_dataset_utf8_str(self): + self._check_dataset("\u0101bc") + + def test_attribute_utf8_str(self): + self._check_attribute("\u0101bc") + + def test_dataset_utf8_bytes(self): + # 0xC481 is the byte representation of U+0101 + self._check_dataset(b"\xc4\x81bc") + + def test_attribute_utf8_bytes(self): + # 0xC481 is the byte representation of U+0101 + self._check_attribute(b"\xc4\x81bc") + + def test_dataset_utf8_bytes_decode(self): + # 0xC481 is the byte representation of U+0101 + self._check_dataset(b"\xc4\x81bc", result="\u0101bc") + + def test_attribute_utf8_bytes_decode(self): + # 0xC481 is the byte representation of U+0101 + self._check_attribute(b"\xc4\x81bc", result="\u0101bc") + + def test_dataset_latin1_bytes(self): + # extended ascii character 0xE4 + self._check_dataset(b"\xe423") + + def test_attribute_latin1_bytes(self): + # extended ascii character 0xE4 + self._check_attribute(b"\xe423") + + def test_dataset_latin1_bytes_decode(self): + # U+DCE4: surrogate for extended ascii character 0xE4 + self._check_dataset(b"\xe423", result="\udce423") + + def test_attribute_latin1_bytes_decode(self): + # U+DCE4: surrogate for extended ascii character 0xE4 + self._check_attribute(b"\xe423", result="\udce423") + + def test_dataset_no_string(self): + self._check_dataset(numpy.int64(10)) + + def test_attribute_no_string(self): + self._check_attribute(numpy.int64(10)) + + +def test_visitall_hdf5(tmp_path): + """visit HDF5 file content not following links""" + external_filepath = tmp_path / "external.h5" + with h5py.File(external_filepath, mode="w") as h5file: + h5file["target/dataset"] = 50 + + filepath = tmp_path / "base.h5" + with h5py.File(filepath, mode="w") as h5file: + h5file["group/dataset"] = 50 + h5file["link/soft_link"] = h5py.SoftLink("/group/dataset") + h5file["link/external_link"] = h5py.ExternalLink("external.h5", "/target/dataset") + + with h5py.File(filepath, mode="r") as h5file: + visited_items = {} + for path, item in utils.visitall(h5file): + if isinstance(item, h5py.Dataset): + content = item[()] + elif isinstance(item, h5py.Group): + content = None + elif isinstance(item, h5py.SoftLink): + content = item.path + elif isinstance(item, h5py.ExternalLink): + content = item.filename, item.path + else: + raise AssertionError("Item should not be present: %s" % path) + visited_items[path] = (item.__class__, content) + + assert visited_items == { + "/group": (h5py.Group, None), + "/group/dataset": (h5py.Dataset, 50), + "/link": (h5py.Group, None), + "/link/soft_link": (h5py.SoftLink, "/group/dataset"), + "/link/external_link": (h5py.ExternalLink, ("external.h5", "/target/dataset")), + } + +def test_visitall_commonh5(): + """Visit commonh5 File object""" + fobj = commonh5.File("filename.file", mode="w") + group = fobj.create_group("group") + dataset = group.create_dataset("dataset", data=numpy.array(50)) + group["soft_link"] = dataset # Create softlink + + visited_items = dict(utils.visitall(fobj)) + assert len(visited_items) == 3 + assert visited_items["/group"] is group + assert visited_items["/group/dataset"] is dataset + soft_link = visited_items["/group/soft_link"] + assert isinstance(soft_link, commonh5.SoftLink) + assert soft_link.path == "/group/dataset" diff --git a/src/silx/io/test/test_write_to_h5.py b/src/silx/io/test/test_write_to_h5.py new file mode 100644 index 0000000..06149c9 --- /dev/null +++ b/src/silx/io/test/test_write_to_h5.py @@ -0,0 +1,118 @@ +# coding: utf-8 +# /*########################################################################## +# Copyright (C) 2021 European Synchrotron Radiation Facility +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. +# +# ############################################################################*/ +"""Test silx.io.convert.write_to_h5""" + + +import h5py +import numpy +from silx.io import spech5 + +from silx.io.convert import write_to_h5 +from silx.io.dictdump import h5todict +from silx.io import commonh5 +from silx.io.spech5 import SpecH5 + + +def test_with_commonh5(tmp_path): + """Test write_to_h5 with commonh5 input""" + fobj = commonh5.File("filename.txt", mode="w") + group = fobj.create_group("group") + dataset = group.create_dataset("dataset", data=numpy.array(50)) + group["soft_link"] = dataset # Create softlink + + output_filepath = tmp_path / "output.h5" + write_to_h5(fobj, str(output_filepath)) + + assert h5todict(str(output_filepath)) == { + 'group': {'dataset': numpy.array(50), 'soft_link': numpy.array(50)}, + } + with h5py.File(output_filepath, mode="r") as h5file: + soft_link = h5file.get("/group/soft_link", getlink=True) + assert isinstance(soft_link, h5py.SoftLink) + assert soft_link.path == "/group/dataset" + + +def test_with_hdf5(tmp_path): + """Test write_to_h5 with HDF5 file input""" + filepath = tmp_path / "base.h5" + with h5py.File(filepath, mode="w") as h5file: + h5file["group/dataset"] = 50 + h5file["group/soft_link"] = h5py.SoftLink("/group/dataset") + h5file["group/external_link"] = h5py.ExternalLink("base.h5", "/group/dataset") + + output_filepath = tmp_path / "output.h5" + write_to_h5(str(filepath), str(output_filepath)) + assert h5todict(str(output_filepath)) == { + 'group': {'dataset': 50, 'soft_link': 50}, + } + with h5py.File(output_filepath, mode="r") as h5file: + soft_link = h5file.get("group/soft_link", getlink=True) + assert isinstance(soft_link, h5py.SoftLink) + assert soft_link.path == "/group/dataset" + + +def test_with_spech5(tmp_path): + """Test write_to_h5 with SpecH5 input""" + filepath = tmp_path / "file.spec" + filepath.write_bytes( + bytes( +"""#F /tmp/sf.dat + +#S 1 cmd +#L a b +1 2 +""", + encoding='ascii') + ) + + output_filepath = tmp_path / "output.h5" + with spech5.SpecH5(str(filepath)) as spech5file: + write_to_h5(spech5file, str(output_filepath)) + print(h5todict(str(output_filepath))) + + def assert_equal(item1, item2): + if isinstance(item1, dict): + assert tuple(item1.keys()) == tuple(item2.keys()) + for key in item1.keys(): + assert_equal(item1[key], item2[key]) + else: + numpy.array_equal(item1, item2) + + assert_equal(h5todict(str(output_filepath)), { + '1.1': { + 'instrument': { + 'positioners': {}, + 'specfile': { + 'file_header': ['#F /tmp/sf.dat'], + 'scan_header': ['#S 1 cmd', '#L a b'], + }, + }, + 'measurement': { + 'a': [1.], + 'b': [2.], + }, + 'start_time': '', + 'title': 'cmd', + }, + }) diff --git a/src/silx/io/url.py b/src/silx/io/url.py new file mode 100644 index 0000000..a3c49e6 --- /dev/null +++ b/src/silx/io/url.py @@ -0,0 +1,388 @@ +# coding: utf-8 +# /*########################################################################## +# +# Copyright (c) 2016-2021 European Synchrotron Radiation Facility +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. +# +# ###########################################################################*/ +"""URL module""" + +__authors__ = ["V. Valls"] +__license__ = "MIT" +__date__ = "29/01/2018" + +import logging +from collections.abc import Iterable +import urllib.parse + + +_logger = logging.getLogger(__name__) + + +class DataUrl(object): + """Non-mutable object to parse a string representing a resource data + locator. + + It supports: + + - path to file and path inside file to the data + - data slicing + - fabio or silx access to the data + - absolute and relative file access + + >>> # fabio access using absolute path + >>> DataUrl("fabio:///data/image.edf?slice=2") + >>> DataUrl("fabio:///C:/data/image.edf?slice=2") + + >>> # silx access using absolute path + >>> DataUrl("silx:///data/image.h5?path=/data/dataset&slice=1,5") + >>> DataUrl("silx:///data/image.edf?path=/scan_0/detector/data") + >>> DataUrl("silx:///C:/data/image.edf?path=/scan_0/detector/data") + + >>> # `path=` can be omited if there is no other query keys + >>> DataUrl("silx:///data/image.h5?/data/dataset") + >>> # is the same as + >>> DataUrl("silx:///data/image.h5?path=/data/dataset") + + >>> # `::` can be used instead of `?` which can be useful with shell in + >>> # command lines + >>> DataUrl("silx:///data/image.h5::/data/dataset") + >>> # is the same as + >>> DataUrl("silx:///data/image.h5?/data/dataset") + + >>> # Relative path access + >>> DataUrl("silx:./image.h5") + >>> DataUrl("fabio:./image.edf") + >>> DataUrl("silx:image.h5") + >>> DataUrl("fabio:image.edf") + + >>> # Is also support parsing of file access for convenience + >>> DataUrl("./foo/bar/image.edf") + >>> DataUrl("C:/data/") + + :param str path: Path representing a link to a data. If specified, other + arguments are not used. + :param str file_path: Link to the file containing the the data. + None if there is no data selection. + :param str data_path: Data selection applyed to the data file selected. + None if there is no data selection. + :param Tuple[int,slice,Ellipse] data_slice: Slicing applyed of the selected + data. None if no slicing applyed. + :param Union[str,None] scheme: Scheme of the URL. "silx", "fabio" + is supported. Other strings can be provided, but :meth:`is_valid` will + be false. + """ + def __init__(self, path=None, file_path=None, data_path=None, data_slice=None, scheme=None): + self.__is_valid = False + if path is not None: + assert(file_path is None) + assert(data_path is None) + assert(data_slice is None) + assert(scheme is None) + self.__parse_from_path(path) + else: + self.__file_path = file_path + self.__data_path = data_path + self.__data_slice = data_slice + self.__scheme = scheme + self.__path = None + self.__check_validity() + + def __eq__(self, other): + if not isinstance(other, DataUrl): + return False + if self.is_valid() != other.is_valid(): + return False + if self.is_valid(): + if self.__scheme != other.scheme(): + return False + if self.__file_path != other.file_path(): + return False + if self.__data_path != other.data_path(): + return False + if self.__data_slice != other.data_slice(): + return False + return True + else: + return self.__path == other.path() + + def __ne__(self, other): + return not (self == other) + + def __repr__(self): + return str(self) + + def __str__(self): + if self.is_valid() or self.__path is None: + def quote_string(string): + if isinstance(string, str): + return "'%s'" % string + else: + return string + + template = "DataUrl(valid=%s, scheme=%s, file_path=%s, data_path=%s, data_slice=%s)" + return template % (self.__is_valid, + quote_string(self.__scheme), + quote_string(self.__file_path), + quote_string(self.__data_path), + self.__data_slice) + else: + template = "DataUrl(valid=%s, string=%s)" + return template % (self.__is_valid, self.__path) + + def __check_validity(self): + """Check the validity of the attributes.""" + if self.__file_path in [None, ""]: + self.__is_valid = False + return + + if self.__scheme is None: + self.__is_valid = True + elif self.__scheme == "fabio": + self.__is_valid = self.__data_path is None + elif self.__scheme == "silx": + # If there is a slice you must have a data path + # But you can have a data path without slice + slice_implies_data = (self.__data_path is None and self.__data_slice is None) or self.__data_path is not None + self.__is_valid = slice_implies_data + else: + self.__is_valid = False + + @staticmethod + def _parse_slice(slice_string): + """Parse a slicing sequence and return an associated tuple. + + It supports a sequence of `...`, `:`, and integers separated by a coma. + + :rtype: tuple + """ + def str_to_slice(string): + if string == "...": + return Ellipsis + elif ':' in string: + if string == ":": + return slice(None) + else: + def get_value(my_str): + if my_str in ('', None): + return None + else: + return int(my_str) + sss = string.split(':') + start = get_value(sss[0]) + stop = get_value(sss[1] if len(sss) > 1 else None) + step = get_value(sss[2] if len(sss) > 2 else None) + return slice(start, stop, step) + else: + return int(string) + + if slice_string == "": + raise ValueError("An empty slice is not valid") + + tokens = slice_string.split(",") + data_slice = [] + for t in tokens: + try: + data_slice.append(str_to_slice(t)) + except ValueError: + raise ValueError("'%s' is not a valid slicing" % t) + return tuple(data_slice) + + def __parse_from_path(self, path): + """Parse the path and initialize attributes. + + :param str path: Path representing the URL. + """ + self.__path = path + # only replace if ? not here already. Otherwise can mess sith + # data_slice if == ::2 for example + if '?' not in path: + path = path.replace("::", "?", 1) + url = urllib.parse.urlparse(path) + + is_valid = True + + if len(url.scheme) <= 2: + # Windows driver + scheme = None + pos = self.__path.index(url.path) + file_path = self.__path[0:pos] + url.path + else: + scheme = url.scheme if url.scheme != "" else None + file_path = url.path + + # Check absolute windows path + if len(file_path) > 2 and file_path[0] == '/': + if file_path[1] == ":" or file_path[2] == ":": + file_path = file_path[1:] + + self.__scheme = scheme + self.__file_path = file_path + + query = urllib.parse.parse_qsl(url.query, keep_blank_values=True) + if len(query) == 1 and query[0][1] == "": + # there is no query keys + data_path = query[0][0] + data_slice = None + else: + merged_query = {} + for name, value in query: + if name in query: + merged_query[name].append(value) + else: + merged_query[name] = [value] + + def pop_single_value(merged_query, name): + if name in merged_query: + values = merged_query.pop(name) + if len(values) > 1: + _logger.warning("More than one query key named '%s'. The last one is used.", name) + value = values[-1] + else: + value = None + return value + + data_path = pop_single_value(merged_query, "path") + data_slice = pop_single_value(merged_query, "slice") + if data_slice is not None: + try: + data_slice = self._parse_slice(data_slice) + except ValueError: + is_valid = False + data_slice = None + + for key in merged_query.keys(): + _logger.warning("Query key %s unsupported. Key skipped.", key) + + self.__data_path = data_path + self.__data_slice = data_slice + + if is_valid: + self.__check_validity() + else: + self.__is_valid = False + + def is_valid(self): + """Returns true if the URL is valid. Else attributes can be None. + + :rtype: bool + """ + return self.__is_valid + + def path(self): + """Returns the string representing the URL. + + :rtype: str + """ + if self.__path is not None: + return self.__path + + def slice_to_string(data_slice): + if data_slice == Ellipsis: + return "..." + elif data_slice == slice(None): + return ":" + elif isinstance(data_slice, int): + return str(data_slice) + else: + raise TypeError("Unexpected slicing type. Found %s" % type(data_slice)) + + if self.__data_path is not None and self.__data_slice is None: + query = self.__data_path + else: + queries = [] + if self.__data_path is not None: + queries.append("path=" + self.__data_path) + if self.__data_slice is not None: + if isinstance(self.__data_slice, Iterable): + data_slice = ",".join([slice_to_string(s) for s in self.__data_slice]) + else: + data_slice = slice_to_string(self.__data_slice) + queries.append("slice=" + data_slice) + query = "&".join(queries) + + path = "" + if self.__file_path is not None: + path += self.__file_path + + if query != "": + path = path + "?" + query + + if self.__scheme is not None: + if self.is_absolute(): + if path.startswith("/"): + path = self.__scheme + "://" + path + else: + path = self.__scheme + ":///" + path + else: + path = self.__scheme + ":" + path + + return path + + def is_absolute(self): + """Returns true if the file path is an absolute path. + + :rtype: bool + """ + file_path = self.file_path() + if file_path is None: + return False + if len(file_path) > 0: + if file_path[0] == "/": + return True + if len(file_path) > 2: + # Windows + if file_path[1] == ":" or file_path[2] == ":": + return True + elif len(file_path) > 1: + # Windows + if file_path[1] == ":": + return True + return False + + def file_path(self): + """Returns the path to the file containing the data. + + :rtype: str + """ + return self.__file_path + + def data_path(self): + """Returns the path inside the file to the data. + + :rtype: str + """ + return self.__data_path + + def data_slice(self): + """Returns the slicing applied to the data. + + It is a tuple containing numbers, slice or ellipses. + + :rtype: Tuple[int, slice, Ellipse] + """ + return self.__data_slice + + def scheme(self): + """Returns the scheme. It can be None if no scheme is specified. + + :rtype: Union[str, None] + """ + return self.__scheme diff --git a/src/silx/io/utils.py b/src/silx/io/utils.py new file mode 100644 index 0000000..642c6fb --- /dev/null +++ b/src/silx/io/utils.py @@ -0,0 +1,1185 @@ +# coding: utf-8 +# /*########################################################################## +# Copyright (C) 2016-2021 European Synchrotron Radiation Facility +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. +# +# ############################################################################*/ +""" I/O utility functions""" + +__authors__ = ["P. Knobel", "V. Valls"] +__license__ = "MIT" +__date__ = "03/12/2020" + +import enum +import os.path +import sys +import time +import logging +import collections +import urllib.parse + +import numpy + +from silx.utils.proxy import Proxy +import silx.io.url +from .._version import calc_hexversion + +import h5py +import h5py.h5t +import h5py.h5a + +try: + import h5pyd +except ImportError as e: + h5pyd = None + +logger = logging.getLogger(__name__) + +NEXUS_HDF5_EXT = [".h5", ".nx5", ".nxs", ".hdf", ".hdf5", ".cxi"] +"""List of possible extensions for HDF5 file formats.""" + + +class H5Type(enum.Enum): + """Identify a set of HDF5 concepts""" + DATASET = 1 + GROUP = 2 + FILE = 3 + SOFT_LINK = 4 + EXTERNAL_LINK = 5 + HARD_LINK = 6 + + +_CLASSES_TYPE = None +"""Store mapping between classes and types""" + +string_types = (basestring,) if sys.version_info[0] == 2 else (str,) # noqa + +builtin_open = open + + +def supported_extensions(flat_formats=True): + """Returns the list file extensions supported by `silx.open`. + + The result filter out formats when the expected module is not available. + + :param bool flat_formats: If true, also include flat formats like npy or + edf (while the expected module is available) + :returns: A dictionary indexed by file description and containing a set of + extensions (an extension is a string like "\\*.ext"). + :rtype: Dict[str, Set[str]] + """ + formats = collections.OrderedDict() + formats["HDF5 files"] = set(["*.h5", "*.hdf", "*.hdf5"]) + formats["NeXus files"] = set(["*.nx", "*.nxs", "*.h5", "*.hdf", "*.hdf5"]) + formats["NeXus layout from spec files"] = set(["*.dat", "*.spec", "*.mca"]) + if flat_formats: + try: + from silx.io import fabioh5 + except ImportError: + fabioh5 = None + if fabioh5 is not None: + formats["NeXus layout from fabio files"] = set(fabioh5.supported_extensions()) + + extensions = ["*.npz"] + if flat_formats: + extensions.append("*.npy") + + formats["Numpy binary files"] = set(extensions) + formats["Coherent X-Ray Imaging files"] = set(["*.cxi"]) + formats["FIO files"] = set(["*.fio"]) + return formats + + +def save1D(fname, x, y, xlabel=None, ylabels=None, filetype=None, + fmt="%.7g", csvdelim=";", newline="\n", header="", + footer="", comments="#", autoheader=False): + """Saves any number of curves to various formats: `Specfile`, `CSV`, + `txt` or `npy`. All curves must have the same number of points and share + the same ``x`` values. + + :param fname: Output file path, or file handle open in write mode. + If ``fname`` is a path, file is opened in ``w`` mode. Existing file + with a same name will be overwritten. + :param x: 1D-Array (or list) of abscissa values. + :param y: 2D-array (or list of lists) of ordinates values. First index + is the curve index, second index is the sample index. The length + of the second dimension (number of samples) must be equal to + ``len(x)``. ``y`` can be a 1D-array in case there is only one curve + to be saved. + :param filetype: Filetype: ``"spec", "csv", "txt", "ndarray"``. + If ``None``, filetype is detected from file name extension + (``.dat, .csv, .txt, .npy``). + :param xlabel: Abscissa label + :param ylabels: List of `y` labels + :param fmt: Format string for data. You can specify a short format + string that defines a single format for both ``x`` and ``y`` values, + or a list of two different format strings (e.g. ``["%d", "%.7g"]``). + Default is ``"%.7g"``. + This parameter does not apply to the `npy` format. + :param csvdelim: String or character separating columns in `txt` and + `CSV` formats. The user is responsible for ensuring that this + delimiter is not used in data labels when writing a `CSV` file. + :param newline: String or character separating lines/records in `txt` + format (default is line break character ``\\n``). + :param header: String that will be written at the beginning of the file in + `txt` format. + :param footer: String that will be written at the end of the file in `txt` + format. + :param comments: String that will be prepended to the ``header`` and + ``footer`` strings, to mark them as comments. Default: ``#``. + :param autoheader: In `CSV` or `txt`, ``True`` causes the first header + line to be written as a standard CSV header line with column labels + separated by the specified CSV delimiter. + + When saving to Specfile format, each curve is saved as a separate scan + with two data columns (``x`` and ``y``). + + `CSV` and `txt` formats are similar, except that the `txt` format allows + user defined header and footer text blocks, whereas the `CSV` format has + only a single header line with columns labels separated by field + delimiters and no footer. The `txt` format also allows defining a record + separator different from a line break. + + The `npy` format is written with ``numpy.save`` and can be read back with + ``numpy.load``. If ``xlabel`` and ``ylabels`` are undefined, data is saved + as a regular 2D ``numpy.ndarray`` (contatenation of ``x`` and ``y``). If + both ``xlabel`` and ``ylabels`` are defined, the data is saved as a + ``numpy.recarray`` after being transposed and having labels assigned to + columns. + """ + + available_formats = ["spec", "csv", "txt", "ndarray"] + + if filetype is None: + exttypes = {".dat": "spec", + ".csv": "csv", + ".txt": "txt", + ".npy": "ndarray"} + outfname = (fname if not hasattr(fname, "name") else + fname.name) + fileext = os.path.splitext(outfname)[1] + if fileext in exttypes: + filetype = exttypes[fileext] + else: + raise IOError("File type unspecified and could not be " + + "inferred from file extension (not in " + + "txt, dat, csv, npy)") + else: + filetype = filetype.lower() + + if filetype not in available_formats: + raise IOError("File type %s is not supported" % (filetype)) + + # default column headers + if xlabel is None: + xlabel = "x" + if ylabels is None: + if numpy.array(y).ndim > 1: + ylabels = ["y%d" % i for i in range(len(y))] + else: + ylabels = ["y"] + elif isinstance(ylabels, (list, tuple)): + # if ylabels is provided as a list, every element must + # be a string + ylabels = [ylabel if isinstance(ylabel, string_types) else "y%d" % i + for ylabel in ylabels] + + if filetype.lower() == "spec": + # Check if we have regular data: + ref = len(x) + regular = True + for one_y in y: + regular &= len(one_y) == ref + if regular: + if isinstance(fmt, (list, tuple)) and len(fmt) < (len(ylabels) + 1): + fmt = fmt + [fmt[-1] * (1 + len(ylabels) - len(fmt))] + specf = savespec(fname, x, y, xlabel, ylabels, fmt=fmt, + scan_number=1, mode="w", write_file_header=True, + close_file=False) + else: + y_array = numpy.asarray(y) + # make sure y_array is a 2D array even for a single curve + if y_array.ndim == 1: + y_array.shape = 1, -1 + elif y_array.ndim not in [1, 2]: + raise IndexError("y must be a 1D or 2D array") + + # First curve + specf = savespec(fname, x, y_array[0], xlabel, ylabels[0], fmt=fmt, + scan_number=1, mode="w", write_file_header=True, + close_file=False) + # Other curves + for i in range(1, y_array.shape[0]): + specf = savespec(specf, x, y_array[i], xlabel, ylabels[i], + fmt=fmt, scan_number=i + 1, mode="w", + write_file_header=False, close_file=False) + + # close file if we created it + if not hasattr(fname, "write"): + specf.close() + + else: + autoheader_line = xlabel + csvdelim + csvdelim.join(ylabels) + if xlabel is not None and ylabels is not None and filetype == "csv": + # csv format: optional single header line with labels, no footer + if autoheader: + header = autoheader_line + newline + else: + header = "" + comments = "" + footer = "" + newline = "\n" + elif filetype == "txt" and autoheader: + # Comments string is added at the beginning of header string in + # savetxt(). We add another one after the first header line and + # before the rest of the header. + if header: + header = autoheader_line + newline + comments + header + else: + header = autoheader_line + newline + + # Concatenate x and y in a single 2D array + X = numpy.vstack((x, y)) + + if filetype.lower() in ["csv", "txt"]: + X = X.transpose() + savetxt(fname, X, fmt=fmt, delimiter=csvdelim, + newline=newline, header=header, footer=footer, + comments=comments) + + elif filetype.lower() == "ndarray": + if xlabel is not None and ylabels is not None: + labels = [xlabel] + ylabels + + # .transpose is needed here because recarray labels + # apply to columns + X = numpy.core.records.fromrecords(X.transpose(), + names=labels) + numpy.save(fname, X) + + +# Replace with numpy.savetxt when dropping support of numpy < 1.7.0 +def savetxt(fname, X, fmt="%.7g", delimiter=";", newline="\n", + header="", footer="", comments="#"): + """``numpy.savetxt`` backport of header and footer arguments from + numpy=1.7.0. + + See ``numpy.savetxt`` help: + http://docs.scipy.org/doc/numpy-1.10.0/reference/generated/numpy.savetxt.html + """ + if not hasattr(fname, "name"): + ffile = builtin_open(fname, 'wb') + else: + ffile = fname + + if header: + if sys.version_info[0] >= 3: + header = header.encode("utf-8") + ffile.write(header) + + numpy.savetxt(ffile, X, fmt, delimiter, newline) + + if footer: + footer = (comments + footer.replace(newline, newline + comments) + + newline) + if sys.version_info[0] >= 3: + footer = footer.encode("utf-8") + ffile.write(footer) + + if not hasattr(fname, "name"): + ffile.close() + + +def savespec(specfile, x, y, xlabel="X", ylabel="Y", fmt="%.7g", + scan_number=1, mode="w", write_file_header=True, + close_file=False): + """Saves one curve to a SpecFile. + + The curve is saved as a scan with two data columns. To save multiple + curves to a single SpecFile, call this function for each curve by + providing the same file handle each time. + + :param specfile: Output SpecFile name, or file handle open in write + or append mode. If a file name is provided, a new file is open in + write mode (existing file with the same name will be lost) + :param x: 1D-Array (or list) of abscissa values + :param y: 1D-array (or list), or list of them of ordinates values. + All dataset must have the same length as x + :param xlabel: Abscissa label (default ``"X"``) + :param ylabel: Ordinate label, may be a list of labels when multiple curves + are to be saved together. + :param fmt: Format string for data. You can specify a short format + string that defines a single format for both ``x`` and ``y`` values, + or a list of two different format strings (e.g. ``["%d", "%.7g"]``). + Default is ``"%.7g"``. + :param scan_number: Scan number (default 1). + :param mode: Mode for opening file: ``w`` (default), ``a``, ``r+``, + ``w+``, ``a+``. This parameter is only relevant if ``specfile`` is a + path. + :param write_file_header: If ``True``, write a file header before writing + the scan (``#F`` and ``#D`` line). + :param close_file: If ``True``, close the file after saving curve. + :return: ``None`` if ``close_file`` is ``True``, else return the file + handle. + """ + # Make sure we use binary mode for write + # (issue with windows: write() replaces \n with os.linesep in text mode) + if "b" not in mode: + first_letter = mode[0] + assert first_letter in "rwa" + mode = mode.replace(first_letter, first_letter + "b") + + x_array = numpy.asarray(x) + y_array = numpy.asarray(y) + if y_array.ndim > 2: + raise IndexError("Y columns must have be packed as 1D") + + if y_array.shape[-1] != x_array.shape[0]: + raise IndexError("X and Y columns must have the same length") + + if y_array.ndim == 2: + assert isinstance(ylabel, (list, tuple)) + assert y_array.shape[0] == len(ylabel) + labels = (xlabel, *ylabel) + else: + labels = (xlabel, ylabel) + data = numpy.vstack((x_array, y_array)) + ncol = data.shape[0] + assert len(labels) == ncol + + print(xlabel, ylabel, fmt, ncol, x_array, y_array) + if isinstance(fmt, string_types) and fmt.count("%") == 1: + full_fmt_string = " ".join([fmt] * ncol) + elif isinstance(fmt, (list, tuple)) and len(fmt) == ncol: + full_fmt_string = " ".join(fmt) + else: + raise ValueError("`fmt` must be a single format string or a list of " + + "format strings with as many format as ncolumns") + + if not hasattr(specfile, "write"): + f = builtin_open(specfile, mode) + else: + f = specfile + + current_date = "#D %s" % (time.ctime(time.time())) + if write_file_header: + lines = [ "#F %s" % f.name, current_date, ""] + else: + lines = [""] + + lines += [ "#S %d %s" % (scan_number, labels[1]), + current_date, + "#N %d" % ncol, + "#L " + " ".join(labels)] + + for i in data.T: + lines.append(full_fmt_string % tuple(i)) + lines.append("") + output = "\n".join(lines) + f.write(output.encode()) + + if close_file: + f.close() + return None + return f + + +def h5ls(h5group, lvl=0): + """Return a simple string representation of a HDF5 tree structure. + + :param h5group: Any :class:`h5py.Group` or :class:`h5py.File` instance, + or a HDF5 file name + :param lvl: Number of tabulations added to the group. ``lvl`` is + incremented as we recursively process sub-groups. + :return: String representation of an HDF5 tree structure + + + Group names and dataset representation are printed preceded by a number of + tabulations corresponding to their depth in the tree structure. + Datasets are represented as :class:`h5py.Dataset` objects. + + Example:: + + >>> print(h5ls("Downloads/sample.h5")) + +fields + +fieldB + <HDF5 dataset "z": shape (256, 256), type "<f4"> + +fieldE + <HDF5 dataset "x": shape (256, 256), type "<f4"> + <HDF5 dataset "y": shape (256, 256), type "<f4"> + + .. note:: This function requires `h5py <http://www.h5py.org/>`_ to be + installed. + """ + h5repr = '' + if is_group(h5group): + h5f = h5group + elif isinstance(h5group, string_types): + h5f = open(h5group) # silx.io.open + else: + raise TypeError("h5group must be a hdf5-like group object or a file name.") + + for key in h5f.keys(): + # group + if hasattr(h5f[key], 'keys'): + h5repr += '\t' * lvl + '+' + key + h5repr += '\n' + h5repr += h5ls(h5f[key], lvl + 1) + # dataset + else: + h5repr += '\t' * lvl + h5repr += str(h5f[key]) + h5repr += '\n' + + if isinstance(h5group, string_types): + h5f.close() + + return h5repr + + +def _open_local_file(filename): + """ + Load a file as an `h5py.File`-like object. + + Format supported: + - h5 files, if `h5py` module is installed + - SPEC files exposed as a NeXus layout + - raster files exposed as a NeXus layout (if `fabio` is installed) + - fio files exposed as a NeXus layout + - Numpy files ('npy' and 'npz' files) + + The file is opened in read-only mode. + + :param str filename: A filename + :raises: IOError if the file can't be loaded as an h5py.File like object + :rtype: h5py.File + """ + if not os.path.isfile(filename): + raise IOError("Filename '%s' must be a file path" % filename) + + debugging_info = [] + try: + _, extension = os.path.splitext(filename) + + if extension in [".npz", ".npy"]: + try: + from . import rawh5 + return rawh5.NumpyFile(filename) + except (IOError, ValueError) as e: + debugging_info.append((sys.exc_info(), + "File '%s' can't be read as a numpy file." % filename)) + + if h5py.is_hdf5(filename): + try: + return h5py.File(filename, "r") + except OSError: + return h5py.File(filename, "r", libver='latest', swmr=True) + + try: + from . import fabioh5 + return fabioh5.File(filename) + except ImportError: + debugging_info.append((sys.exc_info(), "fabioh5 can't be loaded.")) + except Exception: + debugging_info.append((sys.exc_info(), + "File '%s' can't be read as fabio file." % filename)) + + try: + from . import spech5 + return spech5.SpecH5(filename) + except ImportError: + debugging_info.append((sys.exc_info(), + "spech5 can't be loaded.")) + except IOError: + debugging_info.append((sys.exc_info(), + "File '%s' can't be read as spec file." % filename)) + + try: + from . import fioh5 + return fioh5.FioH5(filename) + except IOError: + debugging_info.append((sys.exc_info(), + "File '%s' can't be read as fio file." % filename)) + + finally: + for exc_info, message in debugging_info: + logger.debug(message, exc_info=exc_info) + + raise IOError("File '%s' can't be read as HDF5" % filename) + + +class _MainNode(Proxy): + """A main node is a sub node of the HDF5 tree which is responsible of the + closure of the file. + + It is a proxy to the sub node, plus support context manager and `close` + method usually provided by `h5py.File`. + + :param h5_node: Target to the proxy. + :param h5_file: Main file. This object became the owner of this file. + """ + + def __init__(self, h5_node, h5_file): + super(_MainNode, self).__init__(h5_node) + self.__file = h5_file + self.__class = get_h5_class(h5_node) + + @property + def h5_class(self): + """Returns the HDF5 class which is mimicked by this class. + + :rtype: H5Type + """ + return self.__class + + @property + def h5py_class(self): + """Returns the h5py classes which is mimicked by this class. It can be + one of `h5py.File, h5py.Group` or `h5py.Dataset`. + + :rtype: h5py class + """ + return h5type_to_h5py_class(self.__class) + + def __enter__(self): + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + self.close() + + def close(self): + """Close the file""" + self.__file.close() + self.__file = None + + +def open(filename): # pylint:disable=redefined-builtin + """ + Open a file as an `h5py`-like object. + + Format supported: + - h5 files, if `h5py` module is installed + - SPEC files exposed as a NeXus layout + - raster files exposed as a NeXus layout (if `fabio` is installed) + - fio files exposed as a NeXus layout + - Numpy files ('npy' and 'npz' files) + + The filename can be trailled an HDF5 path using the separator `::`. In this + case the object returned is a proxy to the target node, implementing the + `close` function and supporting `with` context. + + The file is opened in read-only mode. + + :param str filename: A filename which can containt an HDF5 path by using + `::` separator. + :raises: IOError if the file can't be loaded or path can't be found + :rtype: h5py-like node + """ + url = silx.io.url.DataUrl(filename) + + if url.scheme() in [None, "file", "silx"]: + # That's a local file + if not url.is_valid(): + raise IOError("URL '%s' is not valid" % filename) + h5_file = _open_local_file(url.file_path()) + elif url.scheme() in ["fabio"]: + raise IOError("URL '%s' containing fabio scheme is not supported" % filename) + else: + # That's maybe an URL supported by h5pyd + uri = urllib.parse.urlparse(filename) + if h5pyd is None: + raise IOError("URL '%s' unsupported. Try to install h5pyd." % filename) + path = uri.path + endpoint = "%s://%s" % (uri.scheme, uri.netloc) + if path.startswith("/"): + path = path[1:] + return h5pyd.File(path, 'r', endpoint=endpoint) + + if url.data_slice(): + raise IOError("URL '%s' containing slicing is not supported" % filename) + + if url.data_path() in [None, "/", ""]: + # The full file is requested + return h5_file + else: + # Only a children is requested + if url.data_path() not in h5_file: + msg = "File '%s' does not contain path '%s'." % (filename, url.data_path()) + raise IOError(msg) + node = h5_file[url.data_path()] + proxy = _MainNode(node, h5_file) + return proxy + + +def _get_classes_type(): + """Returns a mapping between Python classes and HDF5 concepts. + + This function allow an lazy initialization to avoid recurssive import + of modules. + """ + global _CLASSES_TYPE + from . import commonh5 + + if _CLASSES_TYPE is not None: + return _CLASSES_TYPE + + _CLASSES_TYPE = collections.OrderedDict() + + _CLASSES_TYPE[commonh5.Dataset] = H5Type.DATASET + _CLASSES_TYPE[commonh5.File] = H5Type.FILE + _CLASSES_TYPE[commonh5.Group] = H5Type.GROUP + _CLASSES_TYPE[commonh5.SoftLink] = H5Type.SOFT_LINK + + _CLASSES_TYPE[h5py.Dataset] = H5Type.DATASET + _CLASSES_TYPE[h5py.File] = H5Type.FILE + _CLASSES_TYPE[h5py.Group] = H5Type.GROUP + _CLASSES_TYPE[h5py.SoftLink] = H5Type.SOFT_LINK + _CLASSES_TYPE[h5py.HardLink] = H5Type.HARD_LINK + _CLASSES_TYPE[h5py.ExternalLink] = H5Type.EXTERNAL_LINK + + if h5pyd is not None: + _CLASSES_TYPE[h5pyd.Dataset] = H5Type.DATASET + _CLASSES_TYPE[h5pyd.File] = H5Type.FILE + _CLASSES_TYPE[h5pyd.Group] = H5Type.GROUP + _CLASSES_TYPE[h5pyd.SoftLink] = H5Type.SOFT_LINK + _CLASSES_TYPE[h5pyd.HardLink] = H5Type.HARD_LINK + _CLASSES_TYPE[h5pyd.ExternalLink] = H5Type.EXTERNAL_LINK + + return _CLASSES_TYPE + + +def get_h5_class(obj=None, class_=None): + """ + Returns the HDF5 type relative to the object or to the class. + + :param obj: Instance of an object + :param class_: A class + :rtype: H5Type + """ + if class_ is None: + class_ = obj.__class__ + + classes = _get_classes_type() + t = classes.get(class_, None) + if t is not None: + return t + + if obj is not None: + if hasattr(obj, "h5_class"): + return obj.h5_class + + for referencedClass_, type_ in classes.items(): + if issubclass(class_, referencedClass_): + classes[class_] = type_ + return type_ + + classes[class_] = None + return None + + +def h5type_to_h5py_class(type_): + """ + Returns an h5py class from an H5Type. None if nothing found. + + :param H5Type type_: + :rtype: H5py class + """ + if type_ == H5Type.FILE: + return h5py.File + if type_ == H5Type.GROUP: + return h5py.Group + if type_ == H5Type.DATASET: + return h5py.Dataset + if type_ == H5Type.SOFT_LINK: + return h5py.SoftLink + if type_ == H5Type.HARD_LINK: + return h5py.HardLink + if type_ == H5Type.EXTERNAL_LINK: + return h5py.ExternalLink + return None + + +def get_h5py_class(obj): + """Returns the h5py class from an object. + + If it is an h5py object or an h5py-like object, an h5py class is returned. + If the object is not an h5py-like object, None is returned. + + :param obj: An object + :return: An h5py object + """ + if hasattr(obj, "h5py_class"): + return obj.h5py_class + type_ = get_h5_class(obj) + return h5type_to_h5py_class(type_) + + +def is_file(obj): + """ + True is the object is an h5py.File-like object. + + :param obj: An object + """ + t = get_h5_class(obj) + return t == H5Type.FILE + + +def is_group(obj): + """ + True if the object is a h5py.Group-like object. A file is a group. + + :param obj: An object + """ + t = get_h5_class(obj) + return t in [H5Type.GROUP, H5Type.FILE] + + +def is_dataset(obj): + """ + True if the object is a h5py.Dataset-like object. + + :param obj: An object + """ + t = get_h5_class(obj) + return t == H5Type.DATASET + + +def is_softlink(obj): + """ + True if the object is a h5py.SoftLink-like object. + + :param obj: An object + """ + t = get_h5_class(obj) + return t == H5Type.SOFT_LINK + + +def is_externallink(obj): + """ + True if the object is a h5py.ExternalLink-like object. + + :param obj: An object + """ + t = get_h5_class(obj) + return t == H5Type.EXTERNAL_LINK + + +def is_link(obj): + """ + True if the object is a h5py link-like object. + + :param obj: An object + """ + t = get_h5_class(obj) + return t in {H5Type.SOFT_LINK, H5Type.EXTERNAL_LINK} + + +def _visitall(item, path=''): + """Helper function for func:`visitall`. + + :param item: Item to visit + :param str path: Relative path of the item + """ + if not is_group(item): + return + + for name, child_item in item.items(): + if isinstance(child_item, (h5py.Group, h5py.Dataset)): + link = item.get(name, getlink=True) + else: + link = child_item + child_path = '/'.join((path, name)) + + ret = link if link is not None and is_link(link) else child_item + yield child_path, ret + yield from _visitall(child_item, child_path) + + +def visitall(item): + """Visit entity recursively including links. + + It does not follow links. + This is a generator yielding (relative path, object) for visited items. + + :param item: The item to visit. + """ + yield from _visitall(item, '') + + +def get_data(url): + """Returns a numpy data from an URL. + + Examples: + + >>> # 1st frame from an EDF using silx.io.open + >>> data = silx.io.get_data("silx:/users/foo/image.edf::/scan_0/instrument/detector_0/data[0]") + + >>> # 1st frame from an EDF using fabio + >>> data = silx.io.get_data("fabio:/users/foo/image.edf::[0]") + + Yet 2 schemes are supported by the function. + + - If `silx` scheme is used, the file is opened using + :meth:`silx.io.open` + and the data is reach using usually NeXus paths. + - If `fabio` scheme is used, the file is opened using :meth:`fabio.open` + from the FabIO library. + No data path have to be specified, but each frames can be accessed + using the data slicing. + This shortcut of :meth:`silx.io.open` allow to have a faster access to + the data. + + .. seealso:: :class:`silx.io.url.DataUrl` + + :param Union[str,silx.io.url.DataUrl]: A data URL + :rtype: Union[numpy.ndarray, numpy.generic] + :raises ImportError: If the mandatory library to read the file is not + available. + :raises ValueError: If the URL is not valid or do not match the data + :raises IOError: If the file is not found or in case of internal error of + :meth:`fabio.open` or :meth:`silx.io.open`. In this last case more + informations are displayed in debug mode. + """ + if not isinstance(url, silx.io.url.DataUrl): + url = silx.io.url.DataUrl(url) + + if not url.is_valid(): + raise ValueError("URL '%s' is not valid" % url.path()) + + if not os.path.exists(url.file_path()): + raise IOError("File '%s' not found" % url.file_path()) + + if url.scheme() == "silx": + data_path = url.data_path() + data_slice = url.data_slice() + + with open(url.file_path()) as h5: + if data_path not in h5: + raise ValueError("Data path from URL '%s' not found" % url.path()) + data = h5[data_path] + + if not silx.io.is_dataset(data): + raise ValueError("Data path from URL '%s' is not a dataset" % url.path()) + + if data_slice is not None: + data = h5py_read_dataset(data, index=data_slice) + else: + # works for scalar and array + data = h5py_read_dataset(data) + + elif url.scheme() == "fabio": + import fabio + data_slice = url.data_slice() + if data_slice is None: + data_slice = (0,) + if data_slice is None or len(data_slice) != 1: + raise ValueError("Fabio slice expect a single frame, but %s found" % data_slice) + index = data_slice[0] + if not isinstance(index, int): + raise ValueError("Fabio slice expect a single integer, but %s found" % data_slice) + + try: + fabio_file = fabio.open(url.file_path()) + except Exception: + logger.debug("Error while opening %s with fabio", url.file_path(), exc_info=True) + raise IOError("Error while opening %s with fabio (use debug for more information)" % url.path()) + + if fabio_file.nframes == 1: + if index != 0: + raise ValueError("Only a single frame available. Slice %s out of range" % index) + data = fabio_file.data + else: + data = fabio_file.getframe(index).data + + # There is no explicit close + fabio_file = None + + else: + raise ValueError("Scheme '%s' not supported" % url.scheme()) + + return data + + +def rawfile_to_h5_external_dataset(bin_file, output_url, shape, dtype, + overwrite=False): + """ + Create a HDF5 dataset at `output_url` pointing to the given vol_file. + + Either `shape` or `info_file` must be provided. + + :param str bin_file: Path to the .vol file + :param DataUrl output_url: HDF5 URL where to save the external dataset + :param tuple shape: Shape of the volume + :param numpy.dtype dtype: Data type of the volume elements (default: float32) + :param bool overwrite: True to allow overwriting (default: False). + """ + assert isinstance(output_url, silx.io.url.DataUrl) + assert isinstance(shape, (tuple, list)) + v_majeur, v_mineur, v_micro = [int(i) for i in h5py.version.version.split('.')[:3]] + if calc_hexversion(v_majeur, v_mineur, v_micro)< calc_hexversion(2,9,0): + raise Exception('h5py >= 2.9 should be installed to access the ' + 'external feature.') + + with h5py.File(output_url.file_path(), mode="a") as _h5_file: + if output_url.data_path() in _h5_file: + if overwrite is False: + raise ValueError('data_path already exists') + else: + logger.warning('will overwrite path %s' % output_url.data_path()) + del _h5_file[output_url.data_path()] + external = [(bin_file, 0, h5py.h5f.UNLIMITED)] + _h5_file.create_dataset(output_url.data_path(), + shape, + dtype=dtype, + external=external) + + +def vol_to_h5_external_dataset(vol_file, output_url, info_file=None, + vol_dtype=numpy.float32, overwrite=False): + """ + Create a HDF5 dataset at `output_url` pointing to the given vol_file. + + If the vol_file.info containing the shape is not on the same folder as the + vol-file then you should specify her location. + + :param str vol_file: Path to the .vol file + :param DataUrl output_url: HDF5 URL where to save the external dataset + :param Union[str,None] info_file: + .vol.info file name written by pyhst and containing the shape information + :param numpy.dtype vol_dtype: Data type of the volume elements (default: float32) + :param bool overwrite: True to allow overwriting (default: False). + :raises ValueError: If fails to read shape from the .vol.info file + """ + _info_file = info_file + if _info_file is None: + _info_file = vol_file + '.info' + if not os.path.exists(_info_file): + logger.error('info_file not given and %s does not exists, please' + 'specify .vol.info file' % _info_file) + return + + def info_file_to_dict(): + ddict = {} + with builtin_open(info_file, "r") as _file: + lines = _file.readlines() + for line in lines: + if not '=' in line: + continue + l = line.rstrip().replace(' ', '') + l = l.split('#')[0] + key, value = l.split('=') + ddict[key.lower()] = value + return ddict + + ddict = info_file_to_dict() + if 'num_x' not in ddict or 'num_y' not in ddict or 'num_z' not in ddict: + raise ValueError( + 'Unable to retrieve volume shape from %s' % info_file) + + dimX = int(ddict['num_x']) + dimY = int(ddict['num_y']) + dimZ = int(ddict['num_z']) + shape = (dimZ, dimY, dimX) + + return rawfile_to_h5_external_dataset(bin_file=vol_file, + output_url=output_url, + shape=shape, + dtype=vol_dtype, + overwrite=overwrite) + + +def h5py_decode_value(value, encoding="utf-8", errors="surrogateescape"): + """Keep bytes when value cannot be decoded + + :param value: bytes or array of bytes + :param encoding str: + :param errors str: + """ + try: + if numpy.isscalar(value): + return value.decode(encoding, errors=errors) + str_item = [b.decode(encoding, errors=errors) for b in value.flat] + return numpy.array(str_item, dtype=object).reshape(value.shape) + except UnicodeDecodeError: + return value + + +def h5py_encode_value(value, encoding="utf-8", errors="surrogateescape"): + """Keep string when value cannot be encoding + + :param value: string or array of strings + :param encoding str: + :param errors str: + """ + try: + if numpy.isscalar(value): + return value.encode(encoding, errors=errors) + bytes_item = [s.encode(encoding, errors=errors) for s in value.flat] + return numpy.array(bytes_item, dtype=object).reshape(value.shape) + except UnicodeEncodeError: + return value + + +class H5pyDatasetReadWrapper: + """Wrapper to handle H5T_STRING decoding on-the-fly when reading + a dataset. Uniform behaviour for h5py 2.x and h5py 3.x + + h5py abuses H5T_STRING with ASCII character set + to store `bytes`: dset[()] = b"..." + Therefore an H5T_STRING with ASCII encoding is not decoded by default. + """ + + H5PY_AUTODECODE_NONASCII = int(h5py.version.version.split(".")[0]) < 3 + + def __init__(self, dset, decode_ascii=False): + """ + :param h5py.Dataset dset: + :param bool decode_ascii: + """ + try: + string_info = h5py.h5t.check_string_dtype(dset.dtype) + except AttributeError: + # h5py < 2.10 + try: + idx = dset.id.get_type().get_cset() + except AttributeError: + # Not an H5T_STRING + encoding = None + else: + encoding = ["ascii", "utf-8"][idx] + else: + # h5py >= 2.10 + try: + encoding = string_info.encoding + except AttributeError: + # Not an H5T_STRING + encoding = None + if encoding == "ascii" and not decode_ascii: + encoding = None + if encoding != "ascii" and self.H5PY_AUTODECODE_NONASCII: + # Decoding is already done by the h5py library + encoding = None + if encoding == "ascii": + # ASCII can be decoded as UTF-8 + encoding = "utf-8" + self._encoding = encoding + self._dset = dset + + def __getitem__(self, args): + value = self._dset[args] + if self._encoding: + return h5py_decode_value(value, encoding=self._encoding) + else: + return value + + +class H5pyAttributesReadWrapper: + """Wrapper to handle H5T_STRING decoding on-the-fly when reading + an attribute. Uniform behaviour for h5py 2.x and h5py 3.x + + h5py abuses H5T_STRING with ASCII character set + to store `bytes`: dset[()] = b"..." + Therefore an H5T_STRING with ASCII encoding is not decoded by default. + """ + + H5PY_AUTODECODE = int(h5py.version.version.split(".")[0]) >= 3 + + def __init__(self, attrs, decode_ascii=False): + """ + :param h5py.Dataset dset: + :param bool decode_ascii: + """ + self._attrs = attrs + self._decode_ascii = decode_ascii + + def __getitem__(self, args): + value = self._attrs[args] + + # Get the string encoding (if a string) + try: + dtype = self._attrs.get_id(args).dtype + except AttributeError: + # h5py < 2.10 + attr_id = h5py.h5a.open(self._attrs._id, self._attrs._e(args)) + try: + idx = attr_id.get_type().get_cset() + except AttributeError: + # Not an H5T_STRING + return value + else: + encoding = ["ascii", "utf-8"][idx] + else: + # h5py >= 2.10 + try: + encoding = h5py.h5t.check_string_dtype(dtype).encoding + except AttributeError: + # Not an H5T_STRING + return value + + if self.H5PY_AUTODECODE: + if encoding == "ascii" and not self._decode_ascii: + # Undo decoding by the h5py library + return h5py_encode_value(value, encoding="utf-8") + else: + if encoding == "ascii" and self._decode_ascii: + # Decode ASCII as UTF-8 for consistency + return h5py_decode_value(value, encoding="utf-8") + + # Decoding is already done by the h5py library + return value + + def items(self): + for k in self._attrs.keys(): + yield k, self[k] + + +def h5py_read_dataset(dset, index=tuple(), decode_ascii=False): + """Read data from dataset object. UTF-8 strings will be + decoded while ASCII strings will only be decoded when + `decode_ascii=True`. + + :param h5py.Dataset dset: + :param index: slicing (all by default) + :param bool decode_ascii: + """ + return H5pyDatasetReadWrapper(dset, decode_ascii=decode_ascii)[index] + + +def h5py_read_attribute(attrs, name, decode_ascii=False): + """Read data from attributes. UTF-8 strings will be + decoded while ASCII strings will only be decoded when + `decode_ascii=True`. + + :param h5py.AttributeManager attrs: + :param str name: attribute name + :param bool decode_ascii: + """ + return H5pyAttributesReadWrapper(attrs, decode_ascii=decode_ascii)[name] + + +def h5py_read_attributes(attrs, decode_ascii=False): + """Read data from attributes. UTF-8 strings will be + decoded while ASCII strings will only be decoded when + `decode_ascii=True`. + + :param h5py.AttributeManager attrs: + :param bool decode_ascii: + """ + return dict(H5pyAttributesReadWrapper(attrs, decode_ascii=decode_ascii).items()) |