diff options
Diffstat (limited to 'PyMca5/PyMcaIO/ArraySave.py')
-rw-r--r-- | PyMca5/PyMcaIO/ArraySave.py | 302 |
1 files changed, 159 insertions, 143 deletions
diff --git a/PyMca5/PyMcaIO/ArraySave.py b/PyMca5/PyMcaIO/ArraySave.py index 9989db6..8c4a7e1 100644 --- a/PyMca5/PyMcaIO/ArraySave.py +++ b/PyMca5/PyMcaIO/ArraySave.py @@ -2,7 +2,7 @@ # # The PyMca X-Ray Fluorescence Toolkit # -# Copyright (c) 2004-2014 European Synchrotron Radiation Facility +# Copyright (c) 2004-2018 European Synchrotron Radiation Facility # # This file is part of the PyMca X-ray Fluorescence Toolkit developed at # the ESRF by the Software group. @@ -33,29 +33,50 @@ __copyright__ = "European Synchrotron Radiation Facility, Grenoble, France" import os import numpy import time +import logging +_logger = logging.getLogger(__name__) +import sys try: from PyMca5.PyMcaIO import EdfFile from PyMca5.PyMcaIO import TiffIO except ImportError: - print("ArraySave.py is importing EdfFile and TiffIO from local directory") + _logger.info("ArraySave.py is importing EdfFile and TiffIO from local directory") import EdfFile import TiffIO HDF5 = True try: import h5py + if sys.version_info < (3, ): + text_dtype = h5py.special_dtype(vlen=unicode) + else: + text_dtype = h5py.special_dtype(vlen=str) except ImportError: HDF5 = False -DEBUG = 0 +def to_unicode(s): + """Return string as unicode. + + :param s: A string (bytestring or unicode string). + If s is a bytestring, it is assumed that it is utf-8 encoded text""" + if hasattr(s, "decode"): + return s.decode("utf-8") + return s + + +def to_h5py_utf8(str_list): + """Convert a string or a list of strings to a variable length utf-8 string + compatible with h5py. + """ + return numpy.array(str_list, dtype=text_dtype) def getDate(): localtime = time.localtime() gtime = time.gmtime() - #year, month, day, hour, minute, second,\ + # year, month, day, hour, minute, second,\ # week_day, year_day, delta = time.localtime() year = localtime[0] month = localtime[1] @@ -63,10 +84,10 @@ def getDate(): hour = localtime[3] minute = localtime[4] second = localtime[5] - #get the difference against Greenwich + # get the difference against Greenwich delta = hour - gtime[3] - return "%4d-%02d-%02dT%02d:%02d:%02d%+02d:00" % (year, month, day, hour, - minute, second, delta) + return u"%4d-%02d-%02dT%02d:%02d:%02d%+02d:00" % (year, month, day, hour, + minute, second, delta) def save2DArrayListAsASCII(datalist, filename, @@ -140,11 +161,11 @@ def save2DArrayListAsEDF(datalist, filename, labels=None, dtype=None): for i in range(ndata): if dtype is None: edfout.WriteImage({'Title': labels[i]}, - datalist[i], Append=1) + datalist[i], Append=1) else: edfout.WriteImage({'Title': labels[i]}, - datalist[i].astype(dtype), - Append=1) + datalist[i].astype(dtype), + Append=1) del edfout # force file close @@ -192,7 +213,7 @@ def save2DArrayListAsMonochromaticTiff(datalist, filename, try: os.remove(fname) except OSError: - print("Cannot remove file %s" % fname) + _logger.warning("Cannot remove file %s", fname) pass if (savedData == 0) or multifile: outfileInstance = TiffIO.TiffIO(fname, mode="wb+") @@ -217,6 +238,7 @@ def save2DArrayListAsMonochromaticTiff(datalist, filename, savedData += 1 outfileInstance.close() # force file close + def openHDF5File(name, mode='a', **kwargs): """ Open an HDF5 file. @@ -235,37 +257,22 @@ def openHDF5File(name, mode='a', **kwargs): h5file = h5py.File(name, mode, **kwargs) if h5file.mode != 'r' and len(h5file) == 0: if 'file_name' not in h5file.attrs: - attr = 'file_name' - txt = "%s" % name - dtype = '<S%d' % len(txt) - h5file.attrs.create(attr, txt, dtype=dtype) + h5file.attrs.create('file_name', to_h5py_utf8(name)) if 'file_time' not in h5file.attrs: - attr = 'file_time' - txt = "%s" % getDate() - dtype = '<S%d' % len(txt) - h5file.attrs.create(attr, txt, dtype=dtype) + h5file.attrs.create('file_time', to_h5py_utf8(getDate())) if 'HDF5_version' not in h5file.attrs: - attr = 'HDF5_version' txt = "%s" % h5py.version.hdf5_version - dtype = '<S%d' % len(txt) - h5file.attrs.create(attr, txt, dtype=dtype) + h5file.attrs.create('HDF5_version', to_h5py_utf8(txt)) if 'HDF5_API_version' not in h5file.attrs: - attr = 'HDF5_API_version' txt = "%s" % h5py.version.api_version - dtype = '<S%d' % len(txt) - h5file.attrs.create(attr, txt, dtype=dtype) + h5file.attrs.create('HDF5_API_version', to_h5py_utf8(txt)) if 'h5py_version' not in h5file.attrs: - attr = 'h5py_version' txt = "%s" % h5py.version.version - dtype = '<S%d' % len(txt) - h5file.attrs.create(attr, txt, dtype=dtype) + h5file.attrs.create('h5py_version', to_h5py_utf8(txt)) if 'creator' not in h5file.attrs: - attr = 'creator' - txt = "%s" % 'PyMca' - dtype = '<S%d' % len(txt) - h5file.attrs.create(attr, txt, dtype=dtype) - #if 'format_version' not in self.attrs and len(h5file) == 0: - # h5file.attrs['format_version'] = __format_version__ + h5file.attrs.create('creator', to_h5py_utf8('PyMca')) + # if 'format_version' not in self.attrs and len(h5file) == 0: + # h5file.attrs['format_version'] = __format_version__ return h5file @@ -286,24 +293,23 @@ def getHDF5FileInstanceAndBuffer(filename, shape, hdf = openHDF5File(filename, 'a') entryName = "data" - #entry + # entry nxEntry = hdf.require_group(entryName) if 'NX_class' not in nxEntry.attrs: - nxEntry.attrs['NX_class'] = 'NXentry'.encode('utf-8') - elif nxEntry.attrs['NX_class'] != 'NXentry'.encode('utf-8'): - #should I raise an error? + nxEntry.attrs['NX_class'] = u'NXentry' + elif nxEntry.attrs['NX_class'] not in [b'NXentry', u"NXentry"]: + # should I raise an error? pass - nxEntry['title'] = "PyMca saved 3D Array".encode('utf-8') - nxEntry['start_time'] = getDate().encode('utf-8') + nxEntry['title'] = u"PyMca saved 3D Array" + nxEntry['start_time'] = getDate() nxData = nxEntry.require_group('NXdata') if 'NX_class' not in nxData.attrs: - nxData.attrs['NX_class'] = 'NXdata'.encode('utf-8') - elif nxData.attrs['NX_class'] == 'NXdata'.encode('utf-8'): - #should I raise an error? + nxData.attrs['NX_class'] = u'NXdata' + elif nxData.attrs['NX_class'] in [b'NXdata', u'NXdata']: + # should I raise an error? pass if compression: - if DEBUG: - print("Saving compressed and chunked dataset") + _logger.debug("Saving compressed and chunked dataset") chunk1 = int(shape[1] / 10) if chunk1 == 0: chunk1 = shape[1] @@ -319,30 +325,33 @@ def getHDF5FileInstanceAndBuffer(filename, shape, chunk2 = int(shape[2] / i) break data = nxData.require_dataset(buffername, - shape=shape, - dtype=dtype, - chunks=(1, chunk1, chunk2), - compression=compression) + shape=shape, + dtype=dtype, + chunks=(1, chunk1, chunk2), + compression=compression) else: #no chunking - if DEBUG: - print("Saving not compressed and not chunked dataset") + _logger.debug("Saving not compressed and not chunked dataset") data = nxData.require_dataset(buffername, - shape=shape, - dtype=dtype, - compression=None) - data.attrs['signal'] = numpy.int32(1) + shape=shape, + dtype=dtype, + compression=None) + nxData.attrs['signal'] = to_unicode(buffername) if interpretation is not None: - data.attrs['interpretation'] = interpretation.encode('utf-8') + data.attrs['interpretation'] = to_unicode(interpretation) + for i in range(len(shape)): dim = numpy.arange(shape[i]).astype(numpy.float32) dset = nxData.require_dataset('dim_%d' % i, - dim.shape, - dim.dtype, - dim, - chunks=dim.shape) - dset.attrs['axis'] = numpy.int32(i + 1) - nxEntry['end_time'] = getDate().encode('utf-8') + dim.shape, + dim.dtype, + dim, + chunks=dim.shape) + + nxData.attrs["axes"] = to_h5py_utf8(['dim_%d' % i + for i in range(len(shape))]) + + nxEntry['end_time'] = getDate() return hdf, data @@ -374,6 +383,7 @@ def save3DArrayAsMonochromaticTiff(data, filename, outfileInstance.writeImage(tmpData, info={'Title': labels[i]}) if (ndata > 10): print("Saved image %d of %d" % (i + 1, ndata)) + _logger.info("Saved image %d of %d", i + 1, ndata) elif mcaindex == 1: for i in range(ndata): if i == 1: @@ -384,6 +394,7 @@ def save3DArrayAsMonochromaticTiff(data, filename, tmpData = data[:, i, :].astype(dtype) outfileInstance.writeImage(tmpData, info={'Title': labels[i]}) if (ndata > 10): + _logger.info("Saved image %d of %d", i + 1, ndata) print("Saved image %d of %d" % (i + 1, ndata)) else: for i in range(ndata): @@ -395,20 +406,23 @@ def save3DArrayAsMonochromaticTiff(data, filename, tmpData = data[i].astype(dtype) outfileInstance.writeImage(tmpData, info={'Title': labels[i]}) if (ndata > 10): + _logger.info("Saved image %d of %d", + i + 1, ndata) print("Saved image %d of %d" % (i + 1, ndata)) outfileInstance.close() # force file close + # it should be used to name the data that for the time being is named 'data'. def save3DArrayAsHDF5(data, filename, axes=None, labels=None, dtype=None, mode='nexus', mcaindex=-1, interpretation=None, compression=None): if not HDF5: raise IOError('h5py does not seem to be installed in your system') if (mcaindex == 0) and (interpretation in ["spectrum", None]): - #stack of images to be saved as stack of spectra + # stack of images to be saved as stack of spectra modify = True shape = [data.shape[1], data.shape[2], data.shape[0]] elif (mcaindex != 0) and (interpretation in ["image"]): - #stack of spectra to be saved as stack of images + # stack of spectra to be saved as stack of images modify = True shape = [data.shape[2], data.shape[0], data.shape[1]] else: @@ -417,7 +431,7 @@ def save3DArrayAsHDF5(data, filename, axes=None, labels=None, dtype=None, mode=' if dtype is None: dtype = data.dtype if mode.lower() in ['nexus', 'nexus+']: - #raise IOError, 'NeXus data saving not implemented yet' + # raise IOError, 'NeXus data saving not implemented yet' if os.path.exists(filename): try: os.remove(filename) @@ -425,27 +439,26 @@ def save3DArrayAsHDF5(data, filename, axes=None, labels=None, dtype=None, mode=' raise IOError("Cannot overwrite existing file!") hdf = openHDF5File(filename, 'a') entryName = "data" - #entry + # entry nxEntry = hdf.require_group(entryName) if 'NX_class' not in nxEntry.attrs: - nxEntry.attrs['NX_class'] = 'NXentry'.encode('utf-8') - elif nxEntry.attrs['NX_class'] != 'NXentry'.encode('utf-8'): - #should I raise an error? + nxEntry.attrs['NX_class'] = u'NXentry' + elif nxEntry.attrs['NX_class'] not in [b'NXentry', u'NXentry']: + # should I raise an error? pass - nxEntry['title'] = numpy.string_("PyMca saved 3D Array".encode('utf-8')) - nxEntry['start_time'] = numpy.string_(getDate().encode('utf-8')) + nxEntry['title'] = u"PyMca saved 3D Array" + nxEntry['start_time'] = getDate() nxData = nxEntry.require_group('NXdata') - if ('NX_class' not in nxData.attrs): - nxData.attrs['NX_class'] = 'NXdata'.encode('utf-8') - elif nxData.attrs['NX_class'] != 'NXdata'.encode('utf-8'): - #should I raise an error? + if 'NX_class' not in nxData.attrs: + nxData.attrs['NX_class'] = u'NXdata' + elif nxData.attrs['NX_class'] not in [u'NXdata', b'NXdata']: + # should I raise an error? pass if modify: - if interpretation in ["image", "image".encode('utf-8')]: + if interpretation in [b"image", u"image"]: if compression: - if DEBUG: - print("Saving compressed and chunked dataset") + _logger.debug("Saving compressed and chunked dataset") #risk of taking a 10 % more space in disk chunk1 = int(shape[1] / 10) if chunk1 == 0: @@ -460,31 +473,31 @@ def save3DArrayAsHDF5(data, filename, axes=None, labels=None, dtype=None, mode=' chunk2 = int(shape[2] / i) break dset = nxData.require_dataset('data', - shape=shape, - dtype=dtype, - chunks=(1, chunk1, chunk2), - compression=compression) + shape=shape, + dtype=dtype, + chunks=(1, chunk1, chunk2), + compression=compression) else: - if DEBUG: - print("Saving not compressed and not chunked dataset") + _logger.debug("Saving not compressed and not chunked dataset") #print not compressed -> Not chunked dset = nxData.require_dataset('data', - shape=shape, - dtype=dtype, - compression=None) + shape=shape, + dtype=dtype, + compression=None) for i in range(data.shape[-1]): tmp = data[:, :, i:i + 1] tmp.shape = 1, shape[1], shape[2] dset[i, 0:shape[1], :] = tmp - print("Saved item %d of %d" % (i + 1, data.shape[-1])) + _logger.info("Saved item %d of %d", + i + 1, data.shape[-1]) elif 0: - #if I do not match the input and output shapes it takes ages - #to save the images as spectra. However, it is much faster - #when performing spectra operations. + # if I do not match the input and output shapes it takes ages + # to save the images as spectra. However, it is much faster + # when performing spectra operations. dset = nxData.require_dataset('data', - shape=shape, - dtype=dtype, - chunks=(1, shape[1], shape[2])) + shape=shape, + dtype=dtype, + chunks=(1, shape[1], shape[2])) for i in range(data.shape[1]): # shape[0] chunk = numpy.zeros((1, data.shape[2], data.shape[0]), dtype) @@ -498,35 +511,33 @@ def save3DArrayAsHDF5(data, filename, axes=None, labels=None, dtype=None, mode=' tmpData = data[k:k + 1, i, :] tmpData.shape = -1 chunk[0, :, k] = tmpData - print("Saving item %d of %d" % (i, data.shape[1])) + _logger.info("Saving item %d of %d", + i, data.shape[1]) dset[i, :, :] = chunk else: - #if I do not match the input and output shapes it takes ages - #to save the images as spectra. This is a very fast saving, but - #the performance is awful when reading. + # if I do not match the input and output shapes it takes ages + # to save the images as spectra. This is a very fast saving, but + # the performance is awful when reading. if compression: - if DEBUG: - print("Saving compressed and chunked dataset") + _logger.debug("Saving compressed and chunked dataset") dset = nxData.require_dataset('data', shape=shape, dtype=dtype, chunks=(shape[0], shape[1], 1), compression=compression) else: - if DEBUG: - print("Saving not compressed and not chunked dataset") + _logger.debug("Saving not compressed and not chunked dataset") dset = nxData.require_dataset('data', - shape=shape, - dtype=dtype, - compression=None) + shape=shape, + dtype=dtype, + compression=None) for i in range(data.shape[0]): tmp = data[i:i + 1, :, :] tmp.shape = shape[0], shape[1], 1 dset[:, :, i:i + 1] = tmp else: if compression: - if DEBUG: - print("Saving compressed and chunked dataset") + _logger.debug("Saving compressed and chunked dataset") chunk1 = int(shape[1] / 10) if chunk1 == 0: chunk1 = shape[1] @@ -541,30 +552,31 @@ def save3DArrayAsHDF5(data, filename, axes=None, labels=None, dtype=None, mode=' if (shape[2] % i) == 0: chunk2 = int(shape[2] / i) break - if DEBUG: - print("Used chunk size = (1, %d, %d)" % (chunk1, chunk2)) + _logger.debug("Used chunk size = (1, %d, %d)", + chunk1, chunk2) dset = nxData.require_dataset('data', - shape=shape, - dtype=dtype, - chunks=(1, chunk1, chunk2), - compression=compression) + shape=shape, + dtype=dtype, + chunks=(1, chunk1, chunk2), + compression=compression) else: - if DEBUG: - print("Saving not compressed and notchunked dataset") + _logger.debug("Saving not compressed and notchunked dataset") dset = nxData.require_dataset('data', - shape=shape, - dtype=dtype, - compression=None) + shape=shape, + dtype=dtype, + compression=None) tmpData = numpy.zeros((1, data.shape[1], data.shape[2]), data.dtype) for i in range(data.shape[0]): tmpData[0:1] = data[i:i + 1] dset[i:i + 1] = tmpData[0:1] - print("Saved item %d of %d" % (i + 1, data.shape[0])) + _logger.info("Saved item %d of %d", i + 1, data.shape[0]) + + nxData.attrs["signal"] = u'data' - dset.attrs['signal'] = "1".encode('utf-8') if interpretation is not None: - dset.attrs['interpretation'] = interpretation.encode('utf-8') + dset.attrs['interpretation'] = to_unicode(interpretation) + axesAttribute = [] for i in range(len(shape)): if axes is None: @@ -584,18 +596,20 @@ def save3DArrayAsHDF5(data, filename, axes=None, labels=None, dtype=None, mode=' dimlabel = 'dim_%d' % i axesAttribute.append(dimlabel) adset = nxData.require_dataset(dimlabel, - dim.shape, - dim.dtype, - compression=None) + dim.shape, + dim.dtype, + compression=None) adset[:] = dim[:] adset.attrs['axis'] = i + 1 - dset.attrs['axes'] = (":".join(axesAttribute)).encode('utf-8') - nxEntry['end_time'] = numpy.string_(getDate().encode('utf-8')) + + nxData.attrs["axes"] = to_h5py_utf8([axAttr for axAttr in axesAttribute]) + + nxEntry['end_time'] = getDate() if mode.lower() == 'nexus+': - #create link - g = h5py.h5g.open(hdf.fid, '/'.encode('utf-8')) - g.link('/data/NXdata/data'.encode('utf-8'), - '/data/data'.encode('utf-8'), + # create link + g = h5py.h5g.open(hdf.fid, '/') + g.link('/data/NXdata/data', + '/data/data', h5py.h5g.LINK_HARD) elif mode.lower() == 'simplest': @@ -607,17 +621,17 @@ def save3DArrayAsHDF5(data, filename, axes=None, labels=None, dtype=None, mode=' hdf = h5py.File(filename, 'a') if compression: hdf.require_dataset('data', - shape=shape, - dtype=dtype, - data=data, - chunks=(1, shape[1], shape[2]), - compression=compression) + shape=shape, + dtype=dtype, + data=data, + chunks=(1, shape[1], shape[2]), + compression=compression) else: hdf.require_dataset('data', - shape=shape, - data=data, - dtype=dtype, - compression=None) + shape=shape, + data=data, + dtype=dtype, + compression=None) else: if os.path.exists(filename): try: @@ -629,13 +643,14 @@ def save3DArrayAsHDF5(data, filename, axes=None, labels=None, dtype=None, mode=' hdf = h5py.File(filename, 'a') dataGroup = hdf.require_group('data') dataGroup.require_dataset('data', - shape=shape, - dtype=dtype, - data=data, - chunks=(1, shape[1], shape[2])) + shape=shape, + dtype=dtype, + data=data, + chunks=(1, shape[1], shape[2])) hdf.flush() hdf.close() + def main(): a = numpy.arange(1000000.) a.shape = 20, 50, 1000 @@ -643,6 +658,7 @@ def main(): getHDF5FileInstanceAndBuffer('/test2.h5', (100, 100, 100)) print("Date String = ", getDate()) + if __name__ == "__main__": main() |