summaryrefslogtreecommitdiff
path: root/silx/io/utils.py
diff options
context:
space:
mode:
Diffstat (limited to 'silx/io/utils.py')
-rw-r--r--silx/io/utils.py419
1 files changed, 320 insertions, 99 deletions
diff --git a/silx/io/utils.py b/silx/io/utils.py
index 361a28b..be19fdb 100644
--- a/silx/io/utils.py
+++ b/silx/io/utils.py
@@ -1,6 +1,6 @@
# coding: utf-8
# /*##########################################################################
-# Copyright (C) 2016-2017 European Synchrotron Radiation Facility
+# Copyright (C) 2016-2018 European Synchrotron Radiation Facility
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
@@ -23,36 +23,88 @@
# ############################################################################*/
""" I/O utility functions"""
+__authors__ = ["P. Knobel", "V. Valls"]
+__license__ = "MIT"
+__date__ = "14/02/2018"
+
import numpy
import os.path
import sys
import time
import logging
+import collections
-from silx.utils.deprecation import deprecated
from silx.utils.proxy import Proxy
+from silx.third_party import six
+from silx.third_party import enum
+import silx.io.url
try:
import h5py
except ImportError as e:
- h5py_missing = True
+ h5py = None
h5py_import_error = e
-else:
- h5py_missing = False
-
-__authors__ = ["P. Knobel", "V. Valls"]
-__license__ = "MIT"
-__date__ = "28/09/2017"
+try:
+ import h5pyd
+except ImportError as e:
+ h5pyd = None
+ h5py_import_error = e
logger = logging.getLogger(__name__)
+
+class H5Type(enum.Enum):
+ """Identify a set of HDF5 concepts"""
+ DATASET = 1
+ GROUP = 2
+ FILE = 3
+ SOFT_LINK = 4
+ EXTERNAL_LINK = 5
+ HARD_LINK = 6
+
+
+_CLASSES_TYPE = None
+"""Store mapping between classes and types"""
+
string_types = (basestring,) if sys.version_info[0] == 2 else (str,) # noqa
builtin_open = open
+def supported_extensions(flat_formats=True):
+ """Returns the list file extensions supported by `silx.open`.
+
+ The result filter out formats when the expected module is not available.
+
+ :param bool flat_formats: If true, also include flat formats like npy or
+ edf (while the expected module is available)
+ :returns: A dictionary indexed by file description and containing a set of
+ extensions (an extension is a string like "\*.ext").
+ :rtype: Dict[str, Set[str]]
+ """
+ formats = {}
+ if h5py is not None:
+ formats["HDF5 files"] = set(["*.h5", "*.hdf"])
+ formats["NeXus files"] = set(["*.nx", "*.nxs", "*.h5", "*.hdf"])
+ formats["NeXus layout from spec files"] = set(["*.dat", "*.spec", "*.mca"])
+ if flat_formats:
+ try:
+ from silx.io import fabioh5
+ except ImportError:
+ fabioh5 = None
+ if fabioh5 is not None:
+ formats["NeXus layout from fabio files"] = set(fabioh5.supported_extensions())
+
+ extensions = ["*.npz"]
+ if flat_formats:
+ extensions.append("*.npy")
+
+ formats["Numpy binary files"] = set(extensions)
+ return formats
+
+
def save1D(fname, x, y, xlabel=None, ylabels=None, filetype=None,
fmt="%.7g", csvdelim=";", newline="\n", header="",
footer="", comments="#", autoheader=False):
@@ -349,7 +401,7 @@ def h5ls(h5group, lvl=0):
.. note:: This function requires `h5py <http://www.h5py.org/>`_ to be
installed.
"""
- if h5py_missing:
+ if h5py is None:
logger.error("h5ls requires h5py")
raise h5py_import_error
@@ -379,7 +431,7 @@ def h5ls(h5group, lvl=0):
return h5repr
-def _open(filename):
+def _open_local_file(filename):
"""
Load a file as an `h5py.File`-like object.
@@ -399,42 +451,43 @@ def _open(filename):
raise IOError("Filename '%s' must be a file path" % filename)
debugging_info = []
+ try:
+ _, extension = os.path.splitext(filename)
- _, extension = os.path.splitext(filename)
+ if extension in [".npz", ".npy"]:
+ try:
+ from . import rawh5
+ return rawh5.NumpyFile(filename)
+ except (IOError, ValueError) as e:
+ debugging_info.append((sys.exc_info(),
+ "File '%s' can't be read as a numpy file." % filename))
- if not h5py_missing:
- if h5py.is_hdf5(filename):
- return h5py.File(filename, "r")
+ if h5py is not None:
+ if h5py.is_hdf5(filename):
+ return h5py.File(filename, "r")
- if extension in [".npz", ".npy"]:
try:
- from . import rawh5
- return rawh5.NumpyFile(filename)
- except (IOError, ValueError) as e:
+ from . import fabioh5
+ return fabioh5.File(filename)
+ except ImportError:
+ debugging_info.append((sys.exc_info(), "fabioh5 can't be loaded."))
+ except Exception:
debugging_info.append((sys.exc_info(),
- "File '%s' can't be read as a numpy file." % filename))
+ "File '%s' can't be read as fabio file." % filename))
- try:
- from . import fabioh5
- return fabioh5.File(filename)
- except ImportError:
- debugging_info.append((sys.exc_info(), "fabioh5 can't be loaded."))
- except Exception:
- debugging_info.append((sys.exc_info(),
- "File '%s' can't be read as fabio file." % filename))
+ try:
+ from . import spech5
+ return spech5.SpecH5(filename)
+ except ImportError:
+ debugging_info.append((sys.exc_info(),
+ "spech5 can't be loaded."))
+ except IOError:
+ debugging_info.append((sys.exc_info(),
+ "File '%s' can't be read as spec file." % filename))
+ finally:
+ for exc_info, message in debugging_info:
+ logger.debug(message, exc_info=exc_info)
- try:
- from . import spech5
- return spech5.SpecH5(filename)
- except ImportError:
- debugging_info.append((sys.exc_info(),
- "spech5 can't be loaded."))
- except IOError:
- debugging_info.append((sys.exc_info(),
- "File '%s' can't be read as spec file." % filename))
-
- for exc_info, message in debugging_info:
- logger.debug(message, exc_info=exc_info)
raise IOError("File '%s' can't be read as HDF5" % filename)
@@ -452,16 +505,24 @@ class _MainNode(Proxy):
def __init__(self, h5_node, h5_file):
super(_MainNode, self).__init__(h5_node)
self.__file = h5_file
- self.__class = get_h5py_class(h5_node)
+ self.__class = get_h5_class(h5_node)
+
+ @property
+ def h5_class(self):
+ """Returns the HDF5 class which is mimicked by this class.
+
+ :rtype: H5Type
+ """
+ return self.__class
@property
def h5py_class(self):
"""Returns the h5py classes which is mimicked by this class. It can be
one of `h5py.File, h5py.Group` or `h5py.Dataset`.
- :rtype: Class
+ :rtype: h5py class
"""
- return self.__class
+ return h5type_to_h5py_class(self.__class)
def __enter__(self):
return self
@@ -496,44 +557,129 @@ def open(filename): # pylint:disable=redefined-builtin
:raises: IOError if the file can't be loaded or path can't be found
:rtype: h5py-like node
"""
- if "::" in filename:
- filename, h5_path = filename.split("::")
+ url = silx.io.url.DataUrl(filename)
+
+ if url.scheme() in [None, "file", "silx"]:
+ # That's a local file
+ if not url.is_valid():
+ raise IOError("URL '%s' is not valid" % filename)
+ h5_file = _open_local_file(url.file_path())
+ elif url.scheme() in ["fabio"]:
+ raise IOError("URL '%s' containing fabio scheme is not supported" % filename)
else:
- filename, h5_path = filename, "/"
+ # That's maybe an URL supported by h5pyd
+ uri = six.moves.urllib.parse.urlparse(filename)
+ if h5pyd is None:
+ raise IOError("URL '%s' unsupported. Try to install h5pyd." % filename)
+ path = uri.path
+ endpoint = "%s://%s" % (uri.scheme, uri.netloc)
+ if path.startswith("/"):
+ path = path[1:]
+ return h5pyd.File(path, 'r', endpoint=endpoint)
+
+ if url.data_slice():
+ raise IOError("URL '%s' containing slicing is not supported" % filename)
+
+ if url.data_path() in [None, "/", ""]:
+ # The full file is requested
+ return h5_file
+ else:
+ # Only a children is requested
+ if url.data_path() not in h5_file:
+ msg = "File '%s' does not contain path '%s'." % (filename, url.data_path())
+ raise IOError(msg)
+ node = h5_file[url.data_path()]
+ proxy = _MainNode(node, h5_file)
+ return proxy
- h5_file = _open(filename)
- if h5_path in ["/", ""]:
- # Short cut
- return h5_file
+def _get_classes_type():
+ """Returns a mapping between Python classes and HDF5 concepts.
+
+ This function allow an lazy initialization to avoid recurssive import
+ of modules.
+ """
+ global _CLASSES_TYPE
+ from . import commonh5
+
+ if _CLASSES_TYPE is not None:
+ return _CLASSES_TYPE
- if h5_path not in h5_file:
- msg = "File '%s' do not contains path '%s'." % (filename, h5_path)
- raise IOError(msg)
+ _CLASSES_TYPE = collections.OrderedDict()
- node = h5_file[h5_path]
- proxy = _MainNode(node, h5_file)
- return proxy
+ _CLASSES_TYPE[commonh5.Dataset] = H5Type.DATASET
+ _CLASSES_TYPE[commonh5.File] = H5Type.FILE
+ _CLASSES_TYPE[commonh5.Group] = H5Type.GROUP
+ _CLASSES_TYPE[commonh5.SoftLink] = H5Type.SOFT_LINK
+ if h5py is not None:
+ _CLASSES_TYPE[h5py.Dataset] = H5Type.DATASET
+ _CLASSES_TYPE[h5py.File] = H5Type.FILE
+ _CLASSES_TYPE[h5py.Group] = H5Type.GROUP
+ _CLASSES_TYPE[h5py.SoftLink] = H5Type.SOFT_LINK
+ _CLASSES_TYPE[h5py.HardLink] = H5Type.HARD_LINK
+ _CLASSES_TYPE[h5py.ExternalLink] = H5Type.EXTERNAL_LINK
-@deprecated
-def load(filename):
+ if h5pyd is not None:
+ _CLASSES_TYPE[h5pyd.Dataset] = H5Type.DATASET
+ _CLASSES_TYPE[h5pyd.File] = H5Type.FILE
+ _CLASSES_TYPE[h5pyd.Group] = H5Type.GROUP
+ _CLASSES_TYPE[h5pyd.SoftLink] = H5Type.SOFT_LINK
+ _CLASSES_TYPE[h5pyd.HardLink] = H5Type.HARD_LINK
+ _CLASSES_TYPE[h5pyd.ExternalLink] = H5Type.EXTERNAL_LINK
+
+ return _CLASSES_TYPE
+
+
+def get_h5_class(obj=None, class_=None):
"""
- Load a file as an `h5py.File`-like object.
+ Returns the HDF5 type relative to the object or to the class.
- Format supported:
- - h5 files, if `h5py` module is installed
- - Spec files if `SpecFile` module is installed
+ :param obj: Instance of an object
+ :param class_: A class
+ :rtype: H5Type
+ """
+ if class_ is None:
+ class_ = obj.__class__
- .. deprecated:: 0.4
- Use :meth:`open`, or :meth:`silx.io.open`. Will be removed in
- Silx 0.5.
+ classes = _get_classes_type()
+ t = classes.get(class_, None)
+ if t is not None:
+ return t
- :param str filename: A filename
- :raises: IOError if the file can't be loaded as an h5py.File like object
- :rtype: h5py.File
+ if obj is not None:
+ if hasattr(obj, "h5_class"):
+ return obj.h5_class
+
+ for referencedClass_, type_ in classes.items():
+ if issubclass(class_, referencedClass_):
+ classes[class_] = type_
+ return type_
+
+ classes[class_] = None
+ return None
+
+
+def h5type_to_h5py_class(type_):
+ """
+ Returns an h5py class from an H5Type. None if nothing found.
+
+ :param H5Type type_:
+ :rtype: H5py class
"""
- return open(filename)
+ if type_ == H5Type.FILE:
+ return h5py.File
+ if type_ == H5Type.GROUP:
+ return h5py.Group
+ if type_ == H5Type.DATASET:
+ return h5py.Dataset
+ if type_ == H5Type.SOFT_LINK:
+ return h5py.SoftLink
+ if type_ == H5Type.HARD_LINK:
+ return h5py.HardLink
+ if type_ == H5Type.EXTERNAL_LINK:
+ return h5py.ExternalLink
+ return None
def get_h5py_class(obj):
@@ -545,12 +691,13 @@ def get_h5py_class(obj):
:param obj: An object
:return: An h5py object
"""
+ if h5py is None:
+ logger.error("get_h5py_class/is_file/is_group/is_dataset requires h5py")
+ raise h5py_import_error
if hasattr(obj, "h5py_class"):
return obj.h5py_class
- elif isinstance(obj, (h5py.File, h5py.Group, h5py.Dataset, h5py.SoftLink)):
- return obj.__class__
- else:
- return None
+ type_ = get_h5_class(obj)
+ return h5type_to_h5py_class(type_)
def is_file(obj):
@@ -559,22 +706,18 @@ def is_file(obj):
:param obj: An object
"""
- class_ = get_h5py_class(obj)
- if class_ is None:
- return False
- return issubclass(class_, h5py.File)
+ t = get_h5_class(obj)
+ return t == H5Type.FILE
def is_group(obj):
"""
- True if the object is a h5py.Group-like object.
+ True if the object is a h5py.Group-like object. A file is a group.
:param obj: An object
"""
- class_ = get_h5py_class(obj)
- if class_ is None:
- return False
- return issubclass(class_, h5py.Group)
+ t = get_h5_class(obj)
+ return t in [H5Type.GROUP, H5Type.FILE]
def is_dataset(obj):
@@ -583,10 +726,8 @@ def is_dataset(obj):
:param obj: An object
"""
- class_ = get_h5py_class(obj)
- if class_ is None:
- return False
- return issubclass(class_, h5py.Dataset)
+ t = get_h5_class(obj)
+ return t == H5Type.DATASET
def is_softlink(obj):
@@ -595,19 +736,99 @@ def is_softlink(obj):
:param obj: An object
"""
- class_ = get_h5py_class(obj)
- if class_ is None:
- return False
- return issubclass(class_, h5py.SoftLink)
+ t = get_h5_class(obj)
+ return t == H5Type.SOFT_LINK
-if h5py_missing:
- def raise_h5py_missing(obj):
- logger.error("get_h5py_class/is_file/is_group/is_dataset requires h5py")
- raise h5py_import_error
+def get_data(url):
+ """Returns a numpy data from an URL.
+
+ Examples:
+
+ >>> # 1st frame from an EDF using silx.io.open
+ >>> data = silx.io.get_data("silx:/users/foo/image.edf::/scan_0/instrument/detector_0/data[0]")
+
+ >>> # 1st frame from an EDF using fabio
+ >>> data = silx.io.get_data("fabio:/users/foo/image.edf::[0]")
+
+ Yet 2 schemes are supported by the function.
+
+ - If `silx` scheme is used, the file is opened using
+ :meth:`silx.io.open`
+ and the data is reach using usually NeXus paths.
+ - If `fabio` scheme is used, the file is opened using :meth:`fabio.open`
+ from the FabIO library.
+ No data path have to be specified, but each frames can be accessed
+ using the data slicing.
+ This shortcut of :meth:`silx.io.open` allow to have a faster access to
+ the data.
+
+ .. seealso:: :class:`silx.io.url.DataUrl`
+
+ :param Union[str,silx.io.url.DataUrl]: A data URL
+ :rtype: Union[numpy.ndarray, numpy.generic]
+ :raises ImportError: If the mandatory library to read the file is not
+ available.
+ :raises ValueError: If the URL is not valid or do not match the data
+ :raises IOError: If the file is not found or in case of internal error of
+ :meth:`fabio.open` or :meth:`silx.io.open`. In this last case more
+ informations are displayed in debug mode.
+ """
+ if not isinstance(url, silx.io.url.DataUrl):
+ url = silx.io.url.DataUrl(url)
+
+ if not url.is_valid():
+ raise ValueError("URL '%s' is not valid" % url.path())
+
+ if not os.path.exists(url.file_path()):
+ raise IOError("File '%s' not found" % url.file_path())
+
+ if url.scheme() == "silx":
+ data_path = url.data_path()
+ data_slice = url.data_slice()
+
+ with open(url.file_path()) as h5:
+ if data_path not in h5:
+ raise ValueError("Data path from URL '%s' not found" % url.path())
+ data = h5[data_path]
+
+ if not silx.io.is_dataset(data):
+ raise ValueError("Data path from URL '%s' is not a dataset" % url.path())
+
+ if data_slice is not None:
+ data = data[data_slice]
+ else:
+ # works for scalar and array
+ data = data[()]
+
+ elif url.scheme() == "fabio":
+ import fabio
+ data_slice = url.data_slice()
+ if data_slice is None:
+ data_slice = (0, )
+ if data_slice is None or len(data_slice) != 1:
+ raise ValueError("Fabio slice expect a single frame, but %s found" % data_slice)
+ index = data_slice[0]
+ if not isinstance(index, int):
+ raise ValueError("Fabio slice expect a single integer, but %s found" % data_slice)
+
+ try:
+ fabio_file = fabio.open(url.file_path())
+ except Exception:
+ logger.debug("Error while opening %s with fabio", url.file_path(), exc_info=True)
+ raise IOError("Error while opening %s with fabio (use debug for more information)" % url.path())
+
+ if fabio_file.nframes == 1:
+ if index != 0:
+ raise ValueError("Only a single frame availalbe. Slice %s out of range" % index)
+ data = fabio_file.data
+ else:
+ data = fabio_file.getframe(index).data
+
+ # There is no explicit close
+ fabio_file = None
+
+ else:
+ raise ValueError("Scheme '%s' not supported" % url.scheme())
- get_h5py_class = raise_h5py_missing
- is_file = raise_h5py_missing
- is_group = raise_h5py_missing
- is_dataset = raise_h5py_missing
- is_softlink = raise_h5py_missing
+ return data