summaryrefslogtreecommitdiff
path: root/silx/io
diff options
context:
space:
mode:
Diffstat (limited to 'silx/io')
-rw-r--r--silx/io/commonh5.py25
-rw-r--r--silx/io/fabioh5.py75
-rw-r--r--silx/io/test/test_fabioh5.py103
3 files changed, 183 insertions, 20 deletions
diff --git a/silx/io/commonh5.py b/silx/io/commonh5.py
index 0f5ac02..02c4181 100644
--- a/silx/io/commonh5.py
+++ b/silx/io/commonh5.py
@@ -37,7 +37,7 @@ from .utils import is_dataset
__authors__ = ["V. Valls", "P. Knobel"]
__license__ = "MIT"
-__date__ = "21/09/2017"
+__date__ = "02/10/2017"
class _MappingProxyType(collections.MutableMapping):
@@ -190,14 +190,29 @@ class Dataset(Node):
def _check_data(self, data):
"""Check that the data provided by the dataset is valid.
- :param numpy.ndarray data: Data associated to the dataset
+ It is valid when it can be stored in a HDF5 using h5py.
+ :param numpy.ndarray data: Data associated to the dataset
:raises TypeError: In the case the data is not valid.
"""
+ if isinstance(data, (six.text_type, six.binary_type)):
+ return
+
chartype = data.dtype.char
- if chartype in ["U", "O"]:
- msg = "Type of the dataset '%s' is not supported. Found '%s'."
- raise TypeError(msg % (self.name, data.dtype))
+ if chartype == "U":
+ pass
+ elif chartype == "O":
+ d = h5py.special_dtype(vlen=data.dtype)
+ if d is not None:
+ return
+ d = h5py.special_dtype(ref=data.dtype)
+ if d is not None:
+ return
+ else:
+ return
+
+ msg = "Type of the dataset '%s' is not supported. Found '%s'."
+ raise TypeError(msg % (self.name, data.dtype))
def _set_data(self, data):
"""Set the data exposed by the dataset.
diff --git a/silx/io/fabioh5.py b/silx/io/fabioh5.py
index 3dfba96..2cee032 100644
--- a/silx/io/fabioh5.py
+++ b/silx/io/fabioh5.py
@@ -44,6 +44,12 @@ from . import commonh5
from silx.third_party import six
from silx import version as silx_version
+try:
+ import h5py
+except ImportError as e:
+ h5py = None
+
+
_logger = logging.getLogger(__name__)
@@ -71,6 +77,7 @@ class RawHeaderData(commonh5.LazyLoadableDataset):
"""Initialize hold data by merging all headers of each frames.
"""
headers = []
+ types = set([])
for frame in range(self.__fabio_file.nframes):
if self.__fabio_file.nframes == 1:
header = self.__fabio_file.header
@@ -81,10 +88,33 @@ class RawHeaderData(commonh5.LazyLoadableDataset):
for key, value in header.items():
data.append("%s: %s" % (str(key), str(value)))
- headers.append(u"\n".join(data).encode("utf-8"))
+ data = "\n".join(data)
+ try:
+ line = data.encode("ascii")
+ types.add(numpy.string_)
+ except UnicodeEncodeError:
+ try:
+ line = data.encode("utf-8")
+ types.add(numpy.unicode_)
+ except UnicodeEncodeError:
+ # Fallback in void
+ line = numpy.void(data)
+ types.add(numpy.void)
+
+ headers.append(line)
+
+ if numpy.void in types:
+ dtype = numpy.void
+ elif numpy.unicode_ in types:
+ dtype = numpy.unicode_
+ else:
+ dtype = numpy.string_
+
+ if dtype == numpy.unicode_ and h5py is not None:
+ # h5py only support vlen unicode
+ dtype = h5py.special_dtype(vlen=six.text_type)
- # create the header list
- return numpy.array(headers, dtype=numpy.string_)
+ return numpy.array(headers, dtype=dtype)
class MetadataGroup(commonh5.LazyLoadableGroup):
@@ -214,6 +244,7 @@ class FabioReader(object):
self.__counters = {}
self.__positioners = {}
self.__measurements = {}
+ self.__key_filters = set([])
self.__data = None
self.__frame_count = self.__fabio_file.nframes
self._read(self.__fabio_file)
@@ -323,6 +354,13 @@ class FabioReader(object):
def _read(self, fabio_file):
"""Read all metadata from the fabio file and store it into this
object."""
+
+ self.__key_filters.clear()
+ if hasattr(fabio_file, "RESERVED_HEADER_KEYS"):
+ # Provided in fabio 0.5
+ for key in fabio_file.RESERVED_HEADER_KEYS:
+ self.__key_filters.add(key.lower())
+
for frame in range(fabio_file.nframes):
if fabio_file.nframes == 1:
header = fabio_file.header
@@ -330,10 +368,22 @@ class FabioReader(object):
header = fabio_file.getframe(frame).header
self._read_frame(frame, header)
+ def _is_filtered_key(self, key):
+ """
+ If this function returns True, the :meth:`_read_key` while not be
+ called with this `key`while reading the metatdata frame.
+
+ :param str key: A key of the metadata
+ :rtype: bool
+ """
+ return key.lower() in self.__key_filters
+
def _read_frame(self, frame_id, header):
"""Read all metadata from a frame and store it into this
object."""
for key, value in header.items():
+ if self._is_filtered_key(key):
+ continue
self._read_key(frame_id, key, value)
def _read_key(self, frame_id, name, value):
@@ -372,8 +422,10 @@ class FabioReader(object):
if has_none:
# Fix missing data according to the array type
- if result_type.kind in ["S", "U"]:
- none_value = ""
+ if result_type.kind == "S":
+ none_value = b""
+ elif result_type.kind == "U":
+ none_value = u""
elif result_type.kind == "f":
none_value = numpy.float("NaN")
elif result_type.kind == "i":
@@ -472,10 +524,10 @@ class FabioReader(object):
result_type = numpy.result_type(*types)
- if issubclass(result_type.type, numpy.string_):
+ if issubclass(result_type.type, (numpy.string_, six.binary_type)):
# use the raw data to create the result
return numpy.string_(value)
- elif issubclass(result_type.type, numpy.unicode_):
+ elif issubclass(result_type.type, (numpy.unicode_, six.text_type)):
# use the raw data to create the result
return numpy.unicode_(value)
else:
@@ -526,11 +578,10 @@ class EdfFabioReader(FabioReader):
self._read_mnemonic_key(frame_id, "counter", header)
FabioReader._read_frame(self, frame_id, header)
- def _read_key(self, frame_id, name, value):
- """Overwrite the method to filter counter or motor keys."""
- if name in self.__catch_keys:
- return
- FabioReader._read_key(self, frame_id, name, value)
+ def _is_filtered_key(self, key):
+ if key in self.__catch_keys:
+ return True
+ return FabioReader._is_filtered_key(self, key)
def _get_mnemonic_key(self, base_key, header):
mnemonic_values_key = base_key + "_mne"
diff --git a/silx/io/test/test_fabioh5.py b/silx/io/test/test_fabioh5.py
index 0237620..d9459ae 100644
--- a/silx/io/test/test_fabioh5.py
+++ b/silx/io/test/test_fabioh5.py
@@ -25,11 +25,15 @@
__authors__ = ["V. Valls"]
__license__ = "MIT"
-__date__ = "29/08/2017"
+__date__ = "04/10/2017"
+import os
+import sys
import logging
import numpy
import unittest
+import tempfile
+import shutil
_logger = logging.getLogger(__name__)
@@ -309,11 +313,104 @@ class TestFabioH5(unittest.TestCase):
self.assertIs(data1._get_data(), data2._get_data())
self.assertEqual(self.h5_image.get(data2.name, getlink=True).path, data1.name)
+ def test_dirty_header(self):
+ """Test that it does not fail"""
+ try:
+ header = {}
+ header["foo"] = b'abc'
+ data = numpy.array([[0, 0], [0, 0]], dtype=numpy.int8)
+ fabio_image = fabio.edfimage.edfimage(data=data, header=header)
+ header = {}
+ header["foo"] = b'a\x90bc\xFE'
+ fabio_image.appendFrame(data=data, header=header)
+ except Exception as e:
+ _logger.error(e.args[0])
+ _logger.debug("Backtrace", exc_info=True)
+ self.skipTest("fabio do not allow to create the resource")
+
+ h5_image = fabioh5.File(fabio_image=fabio_image)
+ scan_header_path = "/scan_0/instrument/file/scan_header"
+ self.assertIn(scan_header_path, h5_image)
+ data = h5_image[scan_header_path]
+ self.assertIsInstance(data[...], numpy.ndarray)
+
+ def test_unicode_header(self):
+ """Test that it does not fail"""
+ try:
+ header = {}
+ header["foo"] = b'abc'
+ data = numpy.array([[0, 0], [0, 0]], dtype=numpy.int8)
+ fabio_image = fabio.edfimage.edfimage(data=data, header=header)
+ header = {}
+ header["foo"] = u'abc\u2764'
+ fabio_image.appendFrame(data=data, header=header)
+ except Exception as e:
+ _logger.error(e.args[0])
+ _logger.debug("Backtrace", exc_info=True)
+ self.skipTest("fabio do not allow to create the resource")
+
+ h5_image = fabioh5.File(fabio_image=fabio_image)
+ scan_header_path = "/scan_0/instrument/file/scan_header"
+ self.assertIn(scan_header_path, h5_image)
+ data = h5_image[scan_header_path]
+ self.assertIsInstance(data[...], numpy.ndarray)
+
+
+class TestFabioH5WithEdf(unittest.TestCase):
+
+ @classmethod
+ def setUpClass(cls):
+ if fabio is None:
+ raise unittest.SkipTest("fabio is needed")
+ if h5py is None:
+ raise unittest.SkipTest("h5py is needed")
+
+ cls.tmp_directory = tempfile.mkdtemp()
+
+ cls.edf_filename = os.path.join(cls.tmp_directory, "test.edf")
+
+ header = {
+ "integer": "-100",
+ "float": "1.0",
+ "string": "hi!",
+ "list_integer": "100 50 0",
+ "list_float": "1.0 2.0 3.5",
+ "string_looks_like_list": "2000 hi!",
+ }
+ data = numpy.array([[10, 11], [12, 13], [14, 15]], dtype=numpy.int64)
+ fabio_image = fabio.edfimage.edfimage(data, header)
+ fabio_image.write(cls.edf_filename)
+
+ cls.fabio_image = fabio.open(cls.edf_filename)
+ cls.h5_image = fabioh5.File(fabio_image=cls.fabio_image)
+
+ @classmethod
+ def tearDownClass(cls):
+ cls.fabio_image = None
+ cls.h5_image = None
+ if sys.platform == "win32" and fabio is not None:
+ # gc collect is needed to close a file descriptor
+ # opened by fabio and not released.
+ # https://github.com/silx-kit/fabio/issues/167
+ import gc
+ gc.collect()
+ shutil.rmtree(cls.tmp_directory)
+
+ def test_reserved_format_metadata(self):
+ if fabio.hexversion < 327920: # 0.5.0 final
+ self.skipTest("fabio >= 0.5.0 final is needed")
+
+ # The EDF contains reserved keys in the header
+ self.assertIn("HeaderID", self.fabio_image.header)
+ # We do not expose them in FabioH5
+ self.assertNotIn("/scan_0/instrument/detector_0/others/HeaderID", self.h5_image)
+
def suite():
+ loadTests = unittest.defaultTestLoader.loadTestsFromTestCase
test_suite = unittest.TestSuite()
- test_suite.addTest(
- unittest.defaultTestLoader.loadTestsFromTestCase(TestFabioH5))
+ test_suite.addTest(loadTests(TestFabioH5))
+ test_suite.addTest(loadTests(TestFabioH5WithEdf))
return test_suite