New upstream version 0.6.1+dfsg

author: Picca Frédéric-Emmanuel <picca@debian.org> 2017-11-25 16:55:20 +0100
committer: Picca Frédéric-Emmanuel <picca@debian.org> 2017-11-25 16:55:20 +0100
commit: e19c96eff0c310c06c4f268c8b80cb33bd08996f (patch)
tree: f2b4a365ed899be04766f3937bcc2d58d22be065 /silx/io
parent: bfa4dba15485b4192f8bbe13345e9658c97ecf76 (diff)
3 files changed, 183 insertions, 20 deletions
diff --git a/silx/io/commonh5.py b/silx/io/commonh5.py
index 0f5ac02..02c4181 100644
--- a/silx/io/commonh5.py
+++ b/silx/io/commonh5.py
@@ -37,7 +37,7 @@ from .utils import is_dataset
 
 __authors__ = ["V. Valls", "P. Knobel"]
 __license__ = "MIT"
-__date__ = "21/09/2017"
+__date__ = "02/10/2017"
 
 
 class _MappingProxyType(collections.MutableMapping):
@@ -190,14 +190,29 @@ class Dataset(Node):
     def _check_data(self, data):
         """Check that the data provided by the dataset is valid.
 
-        :param numpy.ndarray data: Data associated to the dataset
+        It is valid when it can be stored in a HDF5 using h5py.
 
+        :param numpy.ndarray data: Data associated to the dataset
         :raises TypeError: In the case the data is not valid.
         """
+        if isinstance(data, (six.text_type, six.binary_type)):
+            return
+
         chartype = data.dtype.char
-        if chartype in ["U", "O"]:
-            msg = "Type of the dataset '%s' is not supported. Found '%s'."
-            raise TypeError(msg % (self.name, data.dtype))
+        if chartype == "U":
+            pass
+        elif chartype == "O":
+            d = h5py.special_dtype(vlen=data.dtype)
+            if d is not None:
+                return
+            d = h5py.special_dtype(ref=data.dtype)
+            if d is not None:
+                return
+        else:
+            return
+
+        msg = "Type of the dataset '%s' is not supported. Found '%s'."
+        raise TypeError(msg % (self.name, data.dtype))
 
     def _set_data(self, data):
         """Set the data exposed by the dataset.
diff --git a/silx/io/fabioh5.py b/silx/io/fabioh5.py
index 3dfba96..2cee032 100644
--- a/silx/io/fabioh5.py
+++ b/silx/io/fabioh5.py
@@ -44,6 +44,12 @@ from . import commonh5
 from silx.third_party import six
 from silx import version as silx_version
 
+try:
+    import h5py
+except ImportError as e:
+    h5py = None
+
+
 _logger = logging.getLogger(__name__)
 
 
@@ -71,6 +77,7 @@ class RawHeaderData(commonh5.LazyLoadableDataset):
         """Initialize hold data by merging all headers of each frames.
         """
         headers = []
+        types = set([])
         for frame in range(self.__fabio_file.nframes):
             if self.__fabio_file.nframes == 1:
                 header = self.__fabio_file.header
@@ -81,10 +88,33 @@ class RawHeaderData(commonh5.LazyLoadableDataset):
             for key, value in header.items():
                 data.append("%s: %s" % (str(key), str(value)))
 
-            headers.append(u"\n".join(data).encode("utf-8"))
+            data = "\n".join(data)
+            try:
+                line = data.encode("ascii")
+                types.add(numpy.string_)
+            except UnicodeEncodeError:
+                try:
+                    line = data.encode("utf-8")
+                    types.add(numpy.unicode_)
+                except UnicodeEncodeError:
+                    # Fallback in void
+                    line = numpy.void(data)
+                    types.add(numpy.void)
+
+            headers.append(line)
+
+        if numpy.void in types:
+            dtype = numpy.void
+        elif numpy.unicode_ in types:
+            dtype = numpy.unicode_
+        else:
+            dtype = numpy.string_
+
+        if dtype == numpy.unicode_ and h5py is not None:
+            # h5py only support vlen unicode
+            dtype = h5py.special_dtype(vlen=six.text_type)
 
-        # create the header list
-        return numpy.array(headers, dtype=numpy.string_)
+        return numpy.array(headers, dtype=dtype)
 
 
 class MetadataGroup(commonh5.LazyLoadableGroup):
@@ -214,6 +244,7 @@ class FabioReader(object):
         self.__counters = {}
         self.__positioners = {}
         self.__measurements = {}
+        self.__key_filters = set([])
         self.__data = None
         self.__frame_count = self.__fabio_file.nframes
         self._read(self.__fabio_file)
@@ -323,6 +354,13 @@ class FabioReader(object):
     def _read(self, fabio_file):
         """Read all metadata from the fabio file and store it into this
         object."""
+
+        self.__key_filters.clear()
+        if hasattr(fabio_file, "RESERVED_HEADER_KEYS"):
+            # Provided in fabio 0.5
+            for key in fabio_file.RESERVED_HEADER_KEYS:
+                self.__key_filters.add(key.lower())
+
         for frame in range(fabio_file.nframes):
             if fabio_file.nframes == 1:
                 header = fabio_file.header
@@ -330,10 +368,22 @@ class FabioReader(object):
                 header = fabio_file.getframe(frame).header
             self._read_frame(frame, header)
 
+    def _is_filtered_key(self, key):
+        """
+        If this function returns True, the :meth:`_read_key` while not be
+        called with this `key`while reading the metatdata frame.
+
+        :param str key: A key of the metadata
+        :rtype: bool
+        """
+        return key.lower() in self.__key_filters
+
     def _read_frame(self, frame_id, header):
         """Read all metadata from a frame and store it into this
         object."""
         for key, value in header.items():
+            if self._is_filtered_key(key):
+                continue
             self._read_key(frame_id, key, value)
 
     def _read_key(self, frame_id, name, value):
@@ -372,8 +422,10 @@ class FabioReader(object):
 
         if has_none:
             # Fix missing data according to the array type
-            if result_type.kind in ["S", "U"]:
-                none_value = ""
+            if result_type.kind == "S":
+                none_value = b""
+            elif result_type.kind == "U":
+                none_value = u""
             elif result_type.kind == "f":
                 none_value = numpy.float("NaN")
             elif result_type.kind == "i":
@@ -472,10 +524,10 @@ class FabioReader(object):
 
             result_type = numpy.result_type(*types)
 
-            if issubclass(result_type.type, numpy.string_):
+            if issubclass(result_type.type, (numpy.string_, six.binary_type)):
                 # use the raw data to create the result
                 return numpy.string_(value)
-            elif issubclass(result_type.type, numpy.unicode_):
+            elif issubclass(result_type.type, (numpy.unicode_, six.text_type)):
                 # use the raw data to create the result
                 return numpy.unicode_(value)
             else:
@@ -526,11 +578,10 @@ class EdfFabioReader(FabioReader):
             self._read_mnemonic_key(frame_id, "counter", header)
         FabioReader._read_frame(self, frame_id, header)
 
-    def _read_key(self, frame_id, name, value):
-        """Overwrite the method to filter counter or motor keys."""
-        if name in self.__catch_keys:
-            return
-        FabioReader._read_key(self, frame_id, name, value)
+    def _is_filtered_key(self, key):
+        if key in self.__catch_keys:
+            return True
+        return FabioReader._is_filtered_key(self, key)
 
     def _get_mnemonic_key(self, base_key, header):
         mnemonic_values_key = base_key + "_mne"
diff --git a/silx/io/test/test_fabioh5.py b/silx/io/test/test_fabioh5.py
index 0237620..d9459ae 100644
--- a/silx/io/test/test_fabioh5.py
+++ b/silx/io/test/test_fabioh5.py
@@ -25,11 +25,15 @@
 
 __authors__ = ["V. Valls"]
 __license__ = "MIT"
-__date__ = "29/08/2017"
+__date__ = "04/10/2017"
 
+import os
+import sys
 import logging
 import numpy
 import unittest
+import tempfile
+import shutil
 
 _logger = logging.getLogger(__name__)
 
@@ -309,11 +313,104 @@ class TestFabioH5(unittest.TestCase):
         self.assertIs(data1._get_data(), data2._get_data())
         self.assertEqual(self.h5_image.get(data2.name, getlink=True).path, data1.name)
 
+    def test_dirty_header(self):
+        """Test that it does not fail"""
+        try:
+            header = {}
+            header["foo"] = b'abc'
+            data = numpy.array([[0, 0], [0, 0]], dtype=numpy.int8)
+            fabio_image = fabio.edfimage.edfimage(data=data, header=header)
+            header = {}
+            header["foo"] = b'a\x90bc\xFE'
+            fabio_image.appendFrame(data=data, header=header)
+        except Exception as e:
+            _logger.error(e.args[0])
+            _logger.debug("Backtrace", exc_info=True)
+            self.skipTest("fabio do not allow to create the resource")
+
+        h5_image = fabioh5.File(fabio_image=fabio_image)
+        scan_header_path = "/scan_0/instrument/file/scan_header"
+        self.assertIn(scan_header_path, h5_image)
+        data = h5_image[scan_header_path]
+        self.assertIsInstance(data[...], numpy.ndarray)
+
+    def test_unicode_header(self):
+        """Test that it does not fail"""
+        try:
+            header = {}
+            header["foo"] = b'abc'
+            data = numpy.array([[0, 0], [0, 0]], dtype=numpy.int8)
+            fabio_image = fabio.edfimage.edfimage(data=data, header=header)
+            header = {}
+            header["foo"] = u'abc\u2764'
+            fabio_image.appendFrame(data=data, header=header)
+        except Exception as e:
+            _logger.error(e.args[0])
+            _logger.debug("Backtrace", exc_info=True)
+            self.skipTest("fabio do not allow to create the resource")
+
+        h5_image = fabioh5.File(fabio_image=fabio_image)
+        scan_header_path = "/scan_0/instrument/file/scan_header"
+        self.assertIn(scan_header_path, h5_image)
+        data = h5_image[scan_header_path]
+        self.assertIsInstance(data[...], numpy.ndarray)
+
+
+class TestFabioH5WithEdf(unittest.TestCase):
+
+    @classmethod
+    def setUpClass(cls):
+        if fabio is None:
+            raise unittest.SkipTest("fabio is needed")
+        if h5py is None:
+            raise unittest.SkipTest("h5py is needed")
+
+        cls.tmp_directory = tempfile.mkdtemp()
+
+        cls.edf_filename = os.path.join(cls.tmp_directory, "test.edf")
+
+        header = {
+            "integer": "-100",
+            "float": "1.0",
+            "string": "hi!",
+            "list_integer": "100 50 0",
+            "list_float": "1.0 2.0 3.5",
+            "string_looks_like_list": "2000 hi!",
+        }
+        data = numpy.array([[10, 11], [12, 13], [14, 15]], dtype=numpy.int64)
+        fabio_image = fabio.edfimage.edfimage(data, header)
+        fabio_image.write(cls.edf_filename)
+
+        cls.fabio_image = fabio.open(cls.edf_filename)
+        cls.h5_image = fabioh5.File(fabio_image=cls.fabio_image)
+
+    @classmethod
+    def tearDownClass(cls):
+        cls.fabio_image = None
+        cls.h5_image = None
+        if sys.platform == "win32" and fabio is not None:
+            # gc collect is needed to close a file descriptor
+            # opened by fabio and not released.
+            # https://github.com/silx-kit/fabio/issues/167
+            import gc
+            gc.collect()
+        shutil.rmtree(cls.tmp_directory)
+
+    def test_reserved_format_metadata(self):
+        if fabio.hexversion < 327920:  # 0.5.0 final
+            self.skipTest("fabio >= 0.5.0 final is needed")
+
+        # The EDF contains reserved keys in the header
+        self.assertIn("HeaderID", self.fabio_image.header)
+        # We do not expose them in FabioH5
+        self.assertNotIn("/scan_0/instrument/detector_0/others/HeaderID", self.h5_image)
+
 
 def suite():
+    loadTests = unittest.defaultTestLoader.loadTestsFromTestCase
     test_suite = unittest.TestSuite()
-    test_suite.addTest(
-        unittest.defaultTestLoader.loadTestsFromTestCase(TestFabioH5))
+    test_suite.addTest(loadTests(TestFabioH5))
+    test_suite.addTest(loadTests(TestFabioH5WithEdf))
     return test_suite
author	Picca Frédéric-Emmanuel <picca@debian.org>	2017-11-25 16:55:20 +0100
committer	Picca Frédéric-Emmanuel <picca@debian.org>	2017-11-25 16:55:20 +0100
commit	e19c96eff0c310c06c4f268c8b80cb33bd08996f (patch)
tree	f2b4a365ed899be04766f3937bcc2d58d22be065 /silx/io
parent	bfa4dba15485b4192f8bbe13345e9658c97ecf76 (diff)