summaryrefslogtreecommitdiff
path: root/silx/io/spech5.py
diff options
context:
space:
mode:
Diffstat (limited to 'silx/io/spech5.py')
-rw-r--r--silx/io/spech5.py155
1 files changed, 96 insertions, 59 deletions
diff --git a/silx/io/spech5.py b/silx/io/spech5.py
index 81a7a7e..a112fe0 100644
--- a/silx/io/spech5.py
+++ b/silx/io/spech5.py
@@ -1,6 +1,6 @@
# coding: utf-8
# /*##########################################################################
-# Copyright (C) 2016-2017 European Synchrotron Radiation Facility
+# Copyright (C) 2016-2018 European Synchrotron Radiation Facility
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
@@ -157,20 +157,30 @@ You can test for existence of data or groups::
>>> "spam" in sfh5["1.1"]
False
-Strings are stored encoded as ``numpy.string_``, as recommended by
-`the h5py documentation <http://docs.h5py.org/en/latest/strings.html>`_.
-This ensures maximum compatibility with third party software libraries,
-when saving a :class:`SpecH5` to a HDF5 file using :mod:`silx.io.spectoh5`.
+.. note::
-The type ``numpy.string_`` is a byte-string format. The consequence of this
-is that you should decode strings before using them in **Python 3**::
+ Text used to be stored with a dtype ``numpy.string_`` in silx versions
+ prior to *0.7.0*. The type ``numpy.string_`` is a byte-string format.
+ The consequence of this is that you had to decode strings before using
+ them in **Python 3**::
- >>> from silx.io.spech5 import SpecH5
- >>> sfh5 = SpecH5("31oct98.dat")
- >>> sfh5["/68.1/title"]
- b'68 ascan tx3 -28.5 -24.5 20 0.5'
- >>> sfh5["/68.1/title"].decode()
- '68 ascan tx3 -28.5 -24.5 20 0.5'
+ >>> from silx.io.spech5 import SpecH5
+ >>> sfh5 = SpecH5("31oct98.dat")
+ >>> sfh5["/68.1/title"]
+ b'68 ascan tx3 -28.5 -24.5 20 0.5'
+ >>> sfh5["/68.1/title"].decode()
+ '68 ascan tx3 -28.5 -24.5 20 0.5'
+
+ From silx version *0.7.0* onwards, text is now stored as unicode. This
+ corresponds to the default text type in python 3, and to the *unicode*
+ type in Python 2.
+
+ To be on the safe side, you can test for the presence of a *decode*
+ attribute, to ensure that you always work with unicode text::
+
+ >>> title = sfh5["/68.1/title"]
+ >>> if hasattr(title, "decode"):
+ ... title = title.decode()
"""
@@ -178,28 +188,32 @@ import datetime
import logging
import numpy
import re
-import sys
import io
+import h5py
from silx import version as silx_version
from .specfile import SpecFile
from . import commonh5
+from silx.third_party import six
__authors__ = ["P. Knobel", "D. Naudet"]
__license__ = "MIT"
-__date__ = "23/08/2017"
+__date__ = "01/03/2018"
logger1 = logging.getLogger(__name__)
-try:
- import h5py
-except ImportError:
- h5py = None
- logger1.debug("Module h5py optional.", exc_info=True)
+
+text_dtype = h5py.special_dtype(vlen=six.text_type)
-string_types = (basestring,) if sys.version_info[0] == 2 else (str,) # noqa
-integer_types = (int, long,) if sys.version_info[0] == 2 else (int,) # noqa
+def to_h5py_utf8(str_list):
+ """Convert a string or a list of strings to a numpy array of
+ unicode strings that can be written to HDF5 as utf-8.
+
+ This ensures that the type will be consistent between python 2 and
+ python 3, if attributes or datasets are saved to an HDF5 file.
+ """
+ return numpy.array(str_list, dtype=text_dtype)
def _get_number_of_mca_analysers(scan):
@@ -457,10 +471,9 @@ class SpecH5NodeDataset(commonh5.Dataset, SpecH5Dataset):
def __init__(self, name, data, parent=None, attrs=None):
# get proper value types, to inherit from numpy
# attributes (dtype, shape, size)
- if isinstance(data, string_types):
- # use bytes for maximum compatibility
- # (see http://docs.h5py.org/en/latest/strings.html)
- value = numpy.string_(data)
+ if isinstance(data, six.string_types):
+ # use unicode (utf-8 when saved to HDF5 output)
+ value = to_h5py_utf8(data)
elif isinstance(data, float):
# use 32 bits for float scalars
value = numpy.float32(data)
@@ -472,7 +485,8 @@ class SpecH5NodeDataset(commonh5.Dataset, SpecH5Dataset):
data_kind = array.dtype.kind
if data_kind in ["S", "U"]:
- value = numpy.asarray(array, dtype=numpy.string_)
+ value = numpy.asarray(array,
+ dtype=text_dtype)
elif data_kind in ["f"]:
value = numpy.asarray(array, dtype=numpy.float32)
else:
@@ -547,12 +561,12 @@ class SpecH5(commonh5.File, SpecH5Group):
self._sf = SpecFile(filename)
- attrs = {"NX_class": "NXroot",
- "file_time": datetime.datetime.now().isoformat(),
- "file_name": filename,
- "creator": "silx %s" % silx_version}
+ attrs = {"NX_class": to_h5py_utf8("NXroot"),
+ "file_time": to_h5py_utf8(
+ datetime.datetime.now().isoformat()),
+ "file_name": to_h5py_utf8(filename),
+ "creator": to_h5py_utf8("silx spech5 %s" % silx_version)}
commonh5.File.__init__(self, filename, attrs=attrs)
- assert self.attrs["NX_class"] == "NXroot"
for scan_key in self._sf.keys():
scan = self._sf[scan_key]
@@ -560,7 +574,7 @@ class SpecH5(commonh5.File, SpecH5Group):
self.add_node(scan_group)
def close(self):
- # or del self._sf?
+ self._sf.close()
self._sf = None
@@ -573,10 +587,13 @@ class ScanGroup(commonh5.Group, SpecH5Group):
:param scan: specfile.Scan object
"""
commonh5.Group.__init__(self, scan_key, parent=parent,
- attrs={"NX_class": "NXentry"})
+ attrs={"NX_class": to_h5py_utf8("NXentry")})
+ # take title in #S after stripping away scan number and spaces
+ s_hdr_line = scan.scan_header_dict["S"]
+ title = s_hdr_line.lstrip("0123456789").lstrip()
self.add_node(SpecH5NodeDataset(name="title",
- data=scan.scan_header_dict["S"],
+ data=to_h5py_utf8(title),
parent=self))
if "D" in scan.scan_header_dict:
@@ -603,7 +620,7 @@ class ScanGroup(commonh5.Group, SpecH5Group):
scan_key)
start_time_str = ""
self.add_node(SpecH5NodeDataset(name="start_time",
- data=start_time_str,
+ data=to_h5py_utf8(start_time_str),
parent=self))
self.add_node(InstrumentGroup(parent=self, scan=scan))
@@ -620,7 +637,7 @@ class InstrumentGroup(commonh5.Group, SpecH5Group):
:param scan: specfile.Scan object
"""
commonh5.Group.__init__(self, name="instrument", parent=parent,
- attrs={"NX_class": "NXinstrument"})
+ attrs={"NX_class": to_h5py_utf8("NXinstrument")})
self.add_node(InstrumentSpecfileGroup(parent=self, scan=scan))
self.add_node(PositionersGroup(parent=self, scan=scan))
@@ -635,21 +652,23 @@ class InstrumentGroup(commonh5.Group, SpecH5Group):
class InstrumentSpecfileGroup(commonh5.Group, SpecH5Group):
def __init__(self, parent, scan):
commonh5.Group.__init__(self, name="specfile", parent=parent,
- attrs={"NX_class": "NXcollection"})
- self.add_node(SpecH5NodeDataset(name="file_header",
- data="\n".join(scan.file_header),
- parent=self,
- attrs={}))
- self.add_node(SpecH5NodeDataset(name="scan_header",
- data="\n".join(scan.scan_header),
- parent=self,
- attrs={}))
+ attrs={"NX_class": to_h5py_utf8("NXcollection")})
+ self.add_node(SpecH5NodeDataset(
+ name="file_header",
+ data=to_h5py_utf8(scan.file_header),
+ parent=self,
+ attrs={}))
+ self.add_node(SpecH5NodeDataset(
+ name="scan_header",
+ data=to_h5py_utf8(scan.scan_header),
+ parent=self,
+ attrs={}))
class PositionersGroup(commonh5.Group, SpecH5Group):
def __init__(self, parent, scan):
commonh5.Group.__init__(self, name="positioners", parent=parent,
- attrs={"NX_class": "NXcollection"})
+ attrs={"NX_class": to_h5py_utf8("NXcollection")})
for motor_name in scan.motor_names:
safe_motor_name = motor_name.replace("/", "%")
if motor_name in scan.labels and scan.data.shape[0] > 0:
@@ -668,11 +687,14 @@ class InstrumentMcaGroup(commonh5.Group, SpecH5Group):
def __init__(self, parent, analyser_index, scan):
name = "mca_%d" % analyser_index
commonh5.Group.__init__(self, name=name, parent=parent,
- attrs={"NX_class": "NXdetector"})
+ attrs={"NX_class": to_h5py_utf8("NXdetector")})
- self.add_node(McaDataDataset(parent=self,
+ mcaDataDataset = McaDataDataset(parent=self,
analyser_index=analyser_index,
- scan=scan))
+ scan=scan)
+ self.add_node(mcaDataDataset)
+ spectrum_length = mcaDataDataset.shape[-1]
+ mcaDataDataset = None
if len(scan.mca.channels) == 1:
# single @CALIB line applying to multiple devices
@@ -681,6 +703,21 @@ class InstrumentMcaGroup(commonh5.Group, SpecH5Group):
else:
calibration_dataset = scan.mca.calibration[analyser_index]
channels_dataset = scan.mca.channels[analyser_index]
+
+ channels_length = len(channels_dataset)
+ if (channels_length > 1) and (spectrum_length > 0):
+ logger1.info("Spectrum and channels length mismatch")
+ # this should always be the case
+ if channels_length > spectrum_length:
+ channels_dataset = channels_dataset[:spectrum_length]
+ elif channels_length < spectrum_length:
+ # only trust first channel and increment
+ channel0 = channels_dataset[0]
+ increment = channels_dataset[1] - channels_dataset[0]
+ channels_dataset = numpy.linspace(channel0,
+ channel0 + increment * spectrum_length,
+ spectrum_length, endpoint=False)
+
self.add_node(SpecH5NodeDataset(name="calibration",
data=calibration_dataset,
parent=self))
@@ -707,7 +744,7 @@ class McaDataDataset(SpecH5LazyNodeDataset):
def __init__(self, parent, analyser_index, scan):
commonh5.LazyLoadableDataset.__init__(
self, name="data", parent=parent,
- attrs={"interpretation": "spectrum", })
+ attrs={"interpretation": to_h5py_utf8("spectrum"),})
self._scan = scan
self._analyser_index = analyser_index
self._shape = None
@@ -741,7 +778,7 @@ class McaDataDataset(SpecH5LazyNodeDataset):
def __getitem__(self, item):
# optimization for fetching a single spectrum if data not already loaded
if not self._is_initialized:
- if isinstance(item, integer_types):
+ if isinstance(item, six.integer_types):
if item < 0:
# negative indexing
item += len(self)
@@ -750,7 +787,7 @@ class McaDataDataset(SpecH5LazyNodeDataset):
# accessing a slice or element of a single spectrum [i, j:k]
try:
spectrum_idx, channel_idx_or_slice = item
- assert isinstance(spectrum_idx, integer_types)
+ assert isinstance(spectrum_idx, six.integer_types)
except (ValueError, TypeError, AssertionError):
pass
else:
@@ -770,7 +807,7 @@ class MeasurementGroup(commonh5.Group, SpecH5Group):
:param scan: specfile.Scan object
"""
commonh5.Group.__init__(self, name="measurement", parent=parent,
- attrs={"NX_class": "NXcollection", })
+ attrs={"NX_class": to_h5py_utf8("NXcollection"),})
for label in scan.labels:
safe_label = label.replace("/", "%")
self.add_node(SpecH5NodeDataset(name=safe_label,
@@ -805,23 +842,23 @@ class SampleGroup(commonh5.Group, SpecH5Group):
:param scan: specfile.Scan object
"""
commonh5.Group.__init__(self, name="sample", parent=parent,
- attrs={"NX_class": "NXsample", })
+ attrs={"NX_class": to_h5py_utf8("NXsample"),})
if _unit_cell_in_scan(scan):
self.add_node(SpecH5NodeDataset(name="unit_cell",
data=_parse_unit_cell(scan.scan_header_dict["G1"]),
parent=self,
- attrs={"interpretation": "scalar"}))
+ attrs={"interpretation": to_h5py_utf8("scalar")}))
self.add_node(SpecH5NodeDataset(name="unit_cell_abc",
data=_parse_unit_cell(scan.scan_header_dict["G1"])[0, 0:3],
parent=self,
- attrs={"interpretation": "scalar"}))
+ attrs={"interpretation": to_h5py_utf8("scalar")}))
self.add_node(SpecH5NodeDataset(name="unit_cell_alphabetagamma",
data=_parse_unit_cell(scan.scan_header_dict["G1"])[0, 3:6],
parent=self,
- attrs={"interpretation": "scalar"}))
+ attrs={"interpretation": to_h5py_utf8("scalar")}))
if _ub_matrix_in_scan(scan):
self.add_node(SpecH5NodeDataset(name="ub_matrix",
data=_parse_UB_matrix(scan.scan_header_dict["G3"]),
parent=self,
- attrs={"interpretation": "scalar"}))
+ attrs={"interpretation": to_h5py_utf8("scalar")}))