1 files changed, 159 insertions, 143 deletions
diff --git a/PyMca5/PyMcaIO/ArraySave.py b/PyMca5/PyMcaIO/ArraySave.py
index 9989db6..8c4a7e1 100644
--- a/PyMca5/PyMcaIO/ArraySave.py
+++ b/PyMca5/PyMcaIO/ArraySave.py
@@ -2,7 +2,7 @@
 #
 # The PyMca X-Ray Fluorescence Toolkit
 #
-# Copyright (c) 2004-2014 European Synchrotron Radiation Facility
+# Copyright (c) 2004-2018 European Synchrotron Radiation Facility
 #
 # This file is part of the PyMca X-ray Fluorescence Toolkit developed at
 # the ESRF by the Software group.
@@ -33,29 +33,50 @@ __copyright__ = "European Synchrotron Radiation Facility, Grenoble, France"
 import os
 import numpy
 import time
+import logging
+_logger = logging.getLogger(__name__)
+import sys
 
 try:
     from PyMca5.PyMcaIO import EdfFile
     from PyMca5.PyMcaIO import TiffIO
 except ImportError:
-    print("ArraySave.py is importing EdfFile and TiffIO from local directory")
+    _logger.info("ArraySave.py is importing EdfFile and TiffIO from local directory")
     import EdfFile
     import TiffIO
 
 HDF5 = True
 try:
     import h5py
+    if sys.version_info < (3, ):
+        text_dtype = h5py.special_dtype(vlen=unicode)
+    else:
+        text_dtype = h5py.special_dtype(vlen=str)
 except ImportError:
     HDF5 = False
 
 
-DEBUG = 0
+def to_unicode(s):
+    """Return string as unicode.
+
+    :param s: A string (bytestring or unicode string).
+        If s is a bytestring, it is assumed that it is utf-8 encoded text"""
+    if hasattr(s, "decode"):
+        return s.decode("utf-8")
+    return s
+
+
+def to_h5py_utf8(str_list):
+    """Convert a string or a list of strings to a variable length utf-8 string
+    compatible with h5py.
+    """
+    return numpy.array(str_list, dtype=text_dtype)
 
 
 def getDate():
     localtime = time.localtime()
     gtime = time.gmtime()
-    #year, month, day, hour, minute, second,\
+    # year, month, day, hour, minute, second,\
     #      week_day, year_day, delta = time.localtime()
     year = localtime[0]
     month = localtime[1]
@@ -63,10 +84,10 @@ def getDate():
     hour = localtime[3]
     minute = localtime[4]
     second = localtime[5]
-    #get the difference against Greenwich
+    # get the difference against Greenwich
     delta = hour - gtime[3]
-    return "%4d-%02d-%02dT%02d:%02d:%02d%+02d:00" % (year, month, day, hour,
-                                                     minute, second, delta)
+    return u"%4d-%02d-%02dT%02d:%02d:%02d%+02d:00" % (year, month, day, hour,
+                                                      minute, second, delta)
 
 
 def save2DArrayListAsASCII(datalist, filename,
@@ -140,11 +161,11 @@ def save2DArrayListAsEDF(datalist, filename, labels=None, dtype=None):
     for i in range(ndata):
         if dtype is None:
             edfout.WriteImage({'Title': labels[i]},
-                                datalist[i], Append=1)
+                              datalist[i], Append=1)
         else:
             edfout.WriteImage({'Title': labels[i]},
-                               datalist[i].astype(dtype),
-                               Append=1)
+                              datalist[i].astype(dtype),
+                              Append=1)
     del edfout  # force file close
 
 
@@ -192,7 +213,7 @@ def save2DArrayListAsMonochromaticTiff(datalist, filename,
             try:
                 os.remove(fname)
             except OSError:
-                print("Cannot remove file %s" % fname)
+                _logger.warning("Cannot remove file %s", fname)
                 pass
         if (savedData == 0) or multifile:
             outfileInstance = TiffIO.TiffIO(fname, mode="wb+")
@@ -217,6 +238,7 @@ def save2DArrayListAsMonochromaticTiff(datalist, filename,
                 savedData += 1
         outfileInstance.close()  # force file close
 
+
 def openHDF5File(name, mode='a', **kwargs):
     """
     Open an HDF5 file.
@@ -235,37 +257,22 @@ def openHDF5File(name, mode='a', **kwargs):
     h5file = h5py.File(name, mode, **kwargs)
     if h5file.mode != 'r' and len(h5file) == 0:
         if 'file_name' not in h5file.attrs:
-            attr = 'file_name'
-            txt = "%s" % name
-            dtype = '<S%d' % len(txt)
-            h5file.attrs.create(attr, txt, dtype=dtype)
+            h5file.attrs.create('file_name', to_h5py_utf8(name))
         if 'file_time' not in h5file.attrs:
-            attr = 'file_time'
-            txt = "%s" % getDate()
-            dtype = '<S%d' % len(txt)
-            h5file.attrs.create(attr, txt, dtype=dtype)
+            h5file.attrs.create('file_time', to_h5py_utf8(getDate()))
         if 'HDF5_version' not in h5file.attrs:
-            attr = 'HDF5_version'
             txt = "%s" % h5py.version.hdf5_version
-            dtype = '<S%d' % len(txt)
-            h5file.attrs.create(attr, txt, dtype=dtype)
+            h5file.attrs.create('HDF5_version', to_h5py_utf8(txt))
         if 'HDF5_API_version' not in h5file.attrs:
-            attr = 'HDF5_API_version'
             txt = "%s" % h5py.version.api_version
-            dtype = '<S%d' % len(txt)
-            h5file.attrs.create(attr, txt, dtype=dtype)
+            h5file.attrs.create('HDF5_API_version', to_h5py_utf8(txt))
         if 'h5py_version' not in h5file.attrs:
-            attr = 'h5py_version'
             txt = "%s" % h5py.version.version
-            dtype = '<S%d' % len(txt)
-            h5file.attrs.create(attr, txt, dtype=dtype)
+            h5file.attrs.create('h5py_version', to_h5py_utf8(txt))
         if 'creator' not in h5file.attrs:
-            attr = 'creator'
-            txt = "%s" % 'PyMca'
-            dtype = '<S%d' % len(txt)
-            h5file.attrs.create(attr, txt, dtype=dtype)
-        #if 'format_version' not in self.attrs and len(h5file) == 0:
-        #    h5file.attrs['format_version'] = __format_version__
+            h5file.attrs.create('creator', to_h5py_utf8('PyMca'))
+        # if 'format_version' not in self.attrs and len(h5file) == 0:
+        #     h5file.attrs['format_version'] = __format_version__
 
     return h5file
 
@@ -286,24 +293,23 @@ def getHDF5FileInstanceAndBuffer(filename, shape,
     hdf = openHDF5File(filename, 'a')
     entryName = "data"
 
-    #entry
+    # entry
     nxEntry = hdf.require_group(entryName)
     if 'NX_class' not in nxEntry.attrs:
-        nxEntry.attrs['NX_class'] = 'NXentry'.encode('utf-8')
-    elif nxEntry.attrs['NX_class'] != 'NXentry'.encode('utf-8'):
-        #should I raise an error?
+        nxEntry.attrs['NX_class'] = u'NXentry'
+    elif nxEntry.attrs['NX_class'] not in [b'NXentry', u"NXentry"]:
+        # should I raise an error?
         pass
-    nxEntry['title'] = "PyMca saved 3D Array".encode('utf-8')
-    nxEntry['start_time'] = getDate().encode('utf-8')
+    nxEntry['title'] = u"PyMca saved 3D Array"
+    nxEntry['start_time'] = getDate()
     nxData = nxEntry.require_group('NXdata')
     if 'NX_class' not in nxData.attrs:
-        nxData.attrs['NX_class'] = 'NXdata'.encode('utf-8')
-    elif nxData.attrs['NX_class'] == 'NXdata'.encode('utf-8'):
-        #should I raise an error?
+        nxData.attrs['NX_class'] = u'NXdata'
+    elif nxData.attrs['NX_class'] in [b'NXdata', u'NXdata']:
+        # should I raise an error?
         pass
     if compression:
-        if DEBUG:
-            print("Saving compressed and chunked dataset")
+        _logger.debug("Saving compressed and chunked dataset")
         chunk1 = int(shape[1] / 10)
         if chunk1 == 0:
             chunk1 = shape[1]
@@ -319,30 +325,33 @@ def getHDF5FileInstanceAndBuffer(filename, shape,
                 chunk2 = int(shape[2] / i)
                 break
         data = nxData.require_dataset(buffername,
-                           shape=shape,
-                           dtype=dtype,
-                           chunks=(1, chunk1, chunk2),
-                           compression=compression)
+                                      shape=shape,
+                                      dtype=dtype,
+                                      chunks=(1, chunk1, chunk2),
+                                      compression=compression)
     else:
         #no chunking
-        if DEBUG:
-            print("Saving not compressed and not chunked dataset")
+        _logger.debug("Saving not compressed and not chunked dataset")
         data = nxData.require_dataset(buffername,
-                           shape=shape,
-                           dtype=dtype,
-                           compression=None)
-    data.attrs['signal'] = numpy.int32(1)
+                                      shape=shape,
+                                      dtype=dtype,
+                                      compression=None)
+    nxData.attrs['signal'] = to_unicode(buffername)
     if interpretation is not None:
-        data.attrs['interpretation'] = interpretation.encode('utf-8')
+        data.attrs['interpretation'] = to_unicode(interpretation)
+
     for i in range(len(shape)):
         dim = numpy.arange(shape[i]).astype(numpy.float32)
         dset = nxData.require_dataset('dim_%d' % i,
-                               dim.shape,
-                               dim.dtype,
-                               dim,
-                               chunks=dim.shape)
-        dset.attrs['axis'] = numpy.int32(i + 1)
-    nxEntry['end_time'] = getDate().encode('utf-8')
+                                      dim.shape,
+                                      dim.dtype,
+                                      dim,
+                                      chunks=dim.shape)
+
+    nxData.attrs["axes"] = to_h5py_utf8(['dim_%d' % i
+                                         for i in range(len(shape))])
+
+    nxEntry['end_time'] = getDate()
     return hdf, data
 
 
@@ -374,6 +383,7 @@ def save3DArrayAsMonochromaticTiff(data, filename,
             outfileInstance.writeImage(tmpData, info={'Title': labels[i]})
             if (ndata > 10):
                 print("Saved image %d of %d" % (i + 1, ndata))
+                _logger.info("Saved image %d of %d", i + 1, ndata)
     elif mcaindex == 1:
         for i in range(ndata):
             if i == 1:
@@ -384,6 +394,7 @@ def save3DArrayAsMonochromaticTiff(data, filename,
                 tmpData = data[:, i, :].astype(dtype)
             outfileInstance.writeImage(tmpData, info={'Title': labels[i]})
             if (ndata > 10):
+                _logger.info("Saved image %d of %d", i + 1, ndata)
                 print("Saved image %d of %d" % (i + 1, ndata))
     else:
         for i in range(ndata):
@@ -395,20 +406,23 @@ def save3DArrayAsMonochromaticTiff(data, filename,
                 tmpData = data[i].astype(dtype)
             outfileInstance.writeImage(tmpData, info={'Title': labels[i]})
             if (ndata > 10):
+                _logger.info("Saved image %d of %d",
+                             i + 1, ndata)
                 print("Saved image %d of %d" % (i + 1, ndata))
     outfileInstance.close()  # force file close
 
+
 # it should be used to name the data that for the time being is named 'data'.
 def save3DArrayAsHDF5(data, filename, axes=None, labels=None, dtype=None, mode='nexus',
                       mcaindex=-1, interpretation=None, compression=None):
     if not HDF5:
         raise IOError('h5py does not seem to be installed in your system')
     if (mcaindex == 0) and (interpretation in ["spectrum", None]):
-        #stack of images to be saved as stack of spectra
+        # stack of images to be saved as stack of spectra
         modify = True
         shape = [data.shape[1], data.shape[2], data.shape[0]]
     elif (mcaindex != 0) and (interpretation in ["image"]):
-        #stack of spectra to be saved as stack of images
+        # stack of spectra to be saved as stack of images
         modify = True
         shape = [data.shape[2], data.shape[0], data.shape[1]]
     else:
@@ -417,7 +431,7 @@ def save3DArrayAsHDF5(data, filename, axes=None, labels=None, dtype=None, mode='
     if dtype is None:
         dtype = data.dtype
     if mode.lower() in ['nexus', 'nexus+']:
-        #raise IOError, 'NeXus data saving not implemented yet'
+        # raise IOError, 'NeXus data saving not implemented yet'
         if os.path.exists(filename):
             try:
                 os.remove(filename)
@@ -425,27 +439,26 @@ def save3DArrayAsHDF5(data, filename, axes=None, labels=None, dtype=None, mode='
                 raise IOError("Cannot overwrite existing file!")
         hdf = openHDF5File(filename, 'a')
         entryName = "data"
-        #entry
+        # entry
         nxEntry = hdf.require_group(entryName)
         if 'NX_class' not in nxEntry.attrs:
-            nxEntry.attrs['NX_class'] = 'NXentry'.encode('utf-8')
-        elif nxEntry.attrs['NX_class'] != 'NXentry'.encode('utf-8'):
-            #should I raise an error?
+            nxEntry.attrs['NX_class'] = u'NXentry'
+        elif nxEntry.attrs['NX_class'] not in [b'NXentry', u'NXentry']:
+            # should I raise an error?
             pass
 
-        nxEntry['title'] = numpy.string_("PyMca saved 3D Array".encode('utf-8'))
-        nxEntry['start_time'] = numpy.string_(getDate().encode('utf-8'))
+        nxEntry['title'] = u"PyMca saved 3D Array"
+        nxEntry['start_time'] = getDate()
         nxData = nxEntry.require_group('NXdata')
-        if ('NX_class' not in nxData.attrs):
-            nxData.attrs['NX_class'] = 'NXdata'.encode('utf-8')
-        elif nxData.attrs['NX_class'] != 'NXdata'.encode('utf-8'):
-            #should I raise an error?
+        if 'NX_class' not in nxData.attrs:
+            nxData.attrs['NX_class'] = u'NXdata'
+        elif nxData.attrs['NX_class'] not in [u'NXdata', b'NXdata']:
+            # should I raise an error?
             pass
         if modify:
-            if interpretation in ["image", "image".encode('utf-8')]:
+            if interpretation in [b"image", u"image"]:
                 if compression:
-                    if DEBUG:
-                        print("Saving compressed and chunked dataset")
+                    _logger.debug("Saving compressed and chunked dataset")
                     #risk of taking a 10 % more space in disk
                     chunk1 = int(shape[1] / 10)
                     if chunk1 == 0:
@@ -460,31 +473,31 @@ def save3DArrayAsHDF5(data, filename, axes=None, labels=None, dtype=None, mode='
                             chunk2 = int(shape[2] / i)
                             break
                     dset = nxData.require_dataset('data',
-                                       shape=shape,
-                                       dtype=dtype,
-                                       chunks=(1, chunk1, chunk2),
-                                       compression=compression)
+                                                  shape=shape,
+                                                  dtype=dtype,
+                                                  chunks=(1, chunk1, chunk2),
+                                                  compression=compression)
                 else:
-                    if DEBUG:
-                        print("Saving not compressed and not chunked dataset")
+                    _logger.debug("Saving not compressed and not chunked dataset")
                     #print not compressed -> Not chunked
                     dset = nxData.require_dataset('data',
-                                       shape=shape,
-                                       dtype=dtype,
-                                       compression=None)
+                                                  shape=shape,
+                                                  dtype=dtype,
+                                                  compression=None)
                 for i in range(data.shape[-1]):
                     tmp = data[:, :, i:i + 1]
                     tmp.shape = 1, shape[1], shape[2]
                     dset[i, 0:shape[1], :] = tmp
-                    print("Saved item %d of %d" % (i + 1, data.shape[-1]))
+                    _logger.info("Saved item %d of %d",
+                                 i + 1, data.shape[-1])
             elif 0:
-                #if I do not match the input and output shapes it takes ages
-                #to save the images as spectra. However, it is much faster
-                #when performing spectra operations.
+                # if I do not match the input and output shapes it takes ages
+                # to save the images as spectra. However, it is much faster
+                # when performing spectra operations.
                 dset = nxData.require_dataset('data',
-                               shape=shape,
-                               dtype=dtype,
-                               chunks=(1, shape[1], shape[2]))
+                                              shape=shape,
+                                              dtype=dtype,
+                                              chunks=(1, shape[1], shape[2]))
                 for i in range(data.shape[1]):  # shape[0]
                     chunk = numpy.zeros((1, data.shape[2], data.shape[0]),
                                         dtype)
@@ -498,35 +511,33 @@ def save3DArrayAsHDF5(data, filename, axes=None, labels=None, dtype=None, mode='
                             tmpData = data[k:k + 1, i, :]
                             tmpData.shape = -1
                             chunk[0, :, k] = tmpData
-                    print("Saving item %d of %d" % (i, data.shape[1]))
+                    _logger.info("Saving item %d of %d",
+                                 i, data.shape[1])
                     dset[i, :, :] = chunk
             else:
-                #if I do not match the input and output shapes it takes ages
-                #to save the images as spectra. This is a very fast saving, but
-                #the performance is awful when reading.
+                # if I do not match the input and output shapes it takes ages
+                # to save the images as spectra. This is a very fast saving, but
+                # the performance is awful when reading.
                 if compression:
-                    if DEBUG:
-                        print("Saving compressed and chunked dataset")
+                    _logger.debug("Saving compressed and chunked dataset")
                     dset = nxData.require_dataset('data',
                                shape=shape,
                                dtype=dtype,
                                chunks=(shape[0], shape[1], 1),
                                compression=compression)
                 else:
-                    if DEBUG:
-                        print("Saving not compressed and not chunked dataset")
+                    _logger.debug("Saving not compressed and not chunked dataset")
                     dset = nxData.require_dataset('data',
-                               shape=shape,
-                               dtype=dtype,
-                               compression=None)
+                                                  shape=shape,
+                                                  dtype=dtype,
+                                                  compression=None)
                 for i in range(data.shape[0]):
                     tmp = data[i:i + 1, :, :]
                     tmp.shape = shape[0], shape[1], 1
                     dset[:, :, i:i + 1] = tmp
         else:
             if compression:
-                if DEBUG:
-                    print("Saving compressed and chunked dataset")
+                _logger.debug("Saving compressed and chunked dataset")
                 chunk1 = int(shape[1] / 10)
                 if chunk1 == 0:
                     chunk1 = shape[1]
@@ -541,30 +552,31 @@ def save3DArrayAsHDF5(data, filename, axes=None, labels=None, dtype=None, mode='
                     if (shape[2] % i) == 0:
                         chunk2 = int(shape[2] / i)
                         break
-                if DEBUG:
-                    print("Used chunk size = (1, %d, %d)" % (chunk1, chunk2))
+                _logger.debug("Used chunk size = (1, %d, %d)",
+                              chunk1, chunk2)
                 dset = nxData.require_dataset('data',
-                               shape=shape,
-                               dtype=dtype,
-                               chunks=(1, chunk1, chunk2),
-                               compression=compression)
+                                              shape=shape,
+                                              dtype=dtype,
+                                              chunks=(1, chunk1, chunk2),
+                                              compression=compression)
             else:
-                if DEBUG:
-                    print("Saving not compressed and notchunked dataset")
+                _logger.debug("Saving not compressed and notchunked dataset")
                 dset = nxData.require_dataset('data',
-                               shape=shape,
-                               dtype=dtype,
-                               compression=None)
+                                              shape=shape,
+                                              dtype=dtype,
+                                              compression=None)
             tmpData = numpy.zeros((1, data.shape[1], data.shape[2]),
                                   data.dtype)
             for i in range(data.shape[0]):
                 tmpData[0:1] = data[i:i + 1]
                 dset[i:i + 1] = tmpData[0:1]
-                print("Saved item %d of %d" % (i + 1, data.shape[0]))
+                _logger.info("Saved item %d of %d", i + 1, data.shape[0])
+
+        nxData.attrs["signal"] = u'data'
 
-        dset.attrs['signal'] = "1".encode('utf-8')
         if interpretation is not None:
-            dset.attrs['interpretation'] = interpretation.encode('utf-8')
+            dset.attrs['interpretation'] = to_unicode(interpretation)
+
         axesAttribute = []
         for i in range(len(shape)):
             if axes is None:
@@ -584,18 +596,20 @@ def save3DArrayAsHDF5(data, filename, axes=None, labels=None, dtype=None, mode='
                 dimlabel = 'dim_%d' % i
             axesAttribute.append(dimlabel)
             adset = nxData.require_dataset(dimlabel,
-                                   dim.shape,
-                                   dim.dtype,
-                                   compression=None)
+                                           dim.shape,
+                                           dim.dtype,
+                                           compression=None)
             adset[:] = dim[:]
             adset.attrs['axis'] = i + 1
-        dset.attrs['axes'] = (":".join(axesAttribute)).encode('utf-8')
-        nxEntry['end_time'] = numpy.string_(getDate().encode('utf-8'))
+
+        nxData.attrs["axes"] = to_h5py_utf8([axAttr for axAttr in axesAttribute])
+
+        nxEntry['end_time'] = getDate()
         if mode.lower() == 'nexus+':
-            #create link
-            g = h5py.h5g.open(hdf.fid, '/'.encode('utf-8'))
-            g.link('/data/NXdata/data'.encode('utf-8'),
-                   '/data/data'.encode('utf-8'),
+            # create link
+            g = h5py.h5g.open(hdf.fid, '/')
+            g.link('/data/NXdata/data',
+                   '/data/data',
                    h5py.h5g.LINK_HARD)
 
     elif mode.lower() == 'simplest':
@@ -607,17 +621,17 @@ def save3DArrayAsHDF5(data, filename, axes=None, labels=None, dtype=None, mode='
         hdf = h5py.File(filename, 'a')
         if compression:
             hdf.require_dataset('data',
-                           shape=shape,
-                           dtype=dtype,
-                           data=data,
-                           chunks=(1, shape[1], shape[2]),
-                           compression=compression)
+                                shape=shape,
+                                dtype=dtype,
+                                data=data,
+                                chunks=(1, shape[1], shape[2]),
+                                compression=compression)
         else:
             hdf.require_dataset('data',
-                           shape=shape,
-                           data=data,
-                           dtype=dtype,
-                           compression=None)
+                                shape=shape,
+                                data=data,
+                                dtype=dtype,
+                                compression=None)
     else:
         if os.path.exists(filename):
             try:
@@ -629,13 +643,14 @@ def save3DArrayAsHDF5(data, filename, axes=None, labels=None, dtype=None, mode='
         hdf = h5py.File(filename, 'a')
         dataGroup = hdf.require_group('data')
         dataGroup.require_dataset('data',
-                           shape=shape,
-                           dtype=dtype,
-                           data=data,
-                           chunks=(1, shape[1], shape[2]))
+                                  shape=shape,
+                                  dtype=dtype,
+                                  data=data,
+                                  chunks=(1, shape[1], shape[2]))
     hdf.flush()
     hdf.close()
 
+
 def main():
     a = numpy.arange(1000000.)
     a.shape = 20, 50, 1000
@@ -643,6 +658,7 @@ def main():
     getHDF5FileInstanceAndBuffer('/test2.h5', (100, 100, 100))
     print("Date String = ", getDate())
 
+
 if __name__ == "__main__":
     main()