summaryrefslogtreecommitdiff
path: root/src/silx/io/fioh5.py
diff options
context:
space:
mode:
Diffstat (limited to 'src/silx/io/fioh5.py')
-rw-r--r--src/silx/io/fioh5.py490
1 files changed, 490 insertions, 0 deletions
diff --git a/src/silx/io/fioh5.py b/src/silx/io/fioh5.py
new file mode 100644
index 0000000..75fe587
--- /dev/null
+++ b/src/silx/io/fioh5.py
@@ -0,0 +1,490 @@
+# coding: utf-8
+# /*##########################################################################
+# Copyright (C) 2021 Timo Fuchs
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+# THE SOFTWARE.
+#
+# ############################################################################*/
+"""This module provides a h5py-like API to access FioFile data.
+
+API description
++++++++++++++++
+
+Fiofile data structure exposed by this API:
+
+::
+
+ /
+ n.1/
+ title = "…"
+ start_time = "…"
+ instrument/
+ fiofile/
+ comments = "…"
+ parameter = "…"
+ comment = "…"
+ parameter/
+ parameter_name = value
+
+ measurement/
+ colname0 = …
+ colname1 = …
+ …
+
+
+The top level scan number ``n.1`` is determined from the filename as in
+``prefix_n.fio``. (e.g. ``eh1_sixc_00045.fio`` would give ``45.1``)
+If no number is available, will use the filename instead.
+
+``comments`` and ``parameter`` in group ``fiofile`` are the raw headers as they
+appear in the original file, as a string of lines separated by newline
+(``\\n``) characters. ``comment`` are the remaining comments,
+which were not parsed.
+
+
+
+The title is the content of the first comment header line
+(e.g ``"ascan ss1vo -4.55687 -0.556875 40 0.2"``).
+The start_time is parsed from the second comment line.
+
+Datasets are stored in the data format specified in the fio file header.
+
+Scan data (e.g. ``/1.1/measurement/colname0``) is accessed by column,
+the dataset name ``colname0`` being the column label as defined in the
+``Col …`` header line.
+
+If a ``/`` character is present in a column label or in a motor name in the
+original FIO file, it will be substituted with a ``%`` character in the
+corresponding dataset name.
+
+MCA data is not yet supported.
+
+This reader requires a fio file as defined in
+src/sardana/macroserver/recorders/storage.py of the Sardana project
+(https://github.com/sardana-org/sardana).
+
+
+Accessing data
+++++++++++++++
+
+Data and groups are accessed in :mod:`h5py` fashion::
+
+ from silx.io.fioh5 import FioH5
+
+ # Open a FioFile
+ fiofh5 = FioH5("test_00056.fio")
+
+ # using FioH5 as a regular group to access scans
+ scan1group = fiofh5["56.1"]
+ instrument_group = scan1group["instrument"]
+
+ # alternative: full path access
+ measurement_group = fiofh5["/56.1/measurement"]
+
+ # accessing a scan data column by name as a 1D numpy array
+ data_array = measurement_group["Pslit HGap"]
+
+
+:class:`FioH5` files and groups provide a :meth:`keys` method::
+
+ >>> fiofh5.keys()
+ ['96.1', '97.1', '98.1']
+ >>> fiofh5['96.1'].keys()
+ ['title', 'start_time', 'instrument', 'measurement']
+
+They can also be treated as iterators:
+
+.. code-block:: python
+
+ from silx.io import is_dataset
+
+ for scan_group in FioH5("test_00056.fio"):
+ dataset_names = [item.name in scan_group["measurement"] if
+ is_dataset(item)]
+ print("Found data columns in scan " + scan_group.name)
+ print(", ".join(dataset_names))
+
+You can test for existence of data or groups::
+
+ >>> "/1.1/measurement/Pslit HGap" in fiofh5
+ True
+ >>> "positioners" in fiofh5["/2.1/instrument"]
+ True
+ >>> "spam" in fiofh5["1.1"]
+ False
+
+"""
+
+__authors__ = ["T. Fuchs"]
+__license__ = "MIT"
+__date__ = "09/04/2021"
+
+
+import os
+
+import datetime
+import logging
+import io
+
+import h5py
+import numpy
+
+from silx import version as silx_version
+from . import commonh5
+
+from .spech5 import to_h5py_utf8
+
+logger1 = logging.getLogger(__name__)
+
+if h5py.version.version_tuple[0] < 3:
+ text_dtype = h5py.special_dtype(vlen=str) # old API
+else:
+ text_dtype = 'O' # variable-length string (supported as of h5py > 3.0)
+
+ABORTLINENO = 5
+
+dtypeConverter = {'STRING': text_dtype,
+ 'DOUBLE': 'f8',
+ 'FLOAT': 'f4',
+ 'INTEGER': 'i8',
+ 'BOOLEAN': '?'}
+
+
+def is_fiofile(filename):
+ """Test if a file is a FIO file, by checking if three consecutive lines
+ start with *!*. Tests up to ABORTLINENO lines at the start of the file.
+
+ :param str filename: File path
+ :return: *True* if file is a FIO file, *False* if it is not a FIO file
+ :rtype: bool
+ """
+ if not os.path.isfile(filename):
+ return False
+ # test for presence of three ! in first lines
+ with open(filename, "rb") as f:
+ chunk = f.read(2500)
+ count = 0
+ for i, line in enumerate(chunk.split(b"\n")):
+ if line.startswith(b"!"):
+ count += 1
+ if count >= 3:
+ return True
+ else:
+ count = 0
+ if i >= ABORTLINENO:
+ break
+ return False
+
+
+class FioFile(object):
+ """This class opens a FIO file and reads the data.
+
+ """
+
+ def __init__(self, filepath):
+ # parse filename
+ filename = os.path.basename(filepath)
+ fnowithsuffix = filename.split('_')[-1]
+ try:
+ self.scanno = int(fnowithsuffix.split('.')[0])
+ except Exception:
+ self.scanno = None
+ logger1.warning("Cannot parse scan number of file %s", filename)
+
+ with open(filepath, 'r') as fiof:
+
+ prev = 0
+ line_counter = 0
+
+ while(True):
+ line = fiof.readline()
+ if line.startswith('!'): # skip comments
+ prev = fiof.tell()
+ line_counter = 0
+ continue
+ if line.startswith('%c'): # comment section
+ line_counter = 0
+ self.commentsection = ''
+ line = fiof.readline()
+ while(not line.startswith('%')
+ and not line.startswith('!')):
+ self.commentsection += line
+ prev = fiof.tell()
+ line = fiof.readline()
+ if line.startswith('%p'): # parameter section
+ line_counter = 0
+ self.parameterssection = ''
+ line = fiof.readline()
+ while(not line.startswith('%')
+ and not line.startswith('!')):
+ self.parameterssection += line
+ prev = fiof.tell()
+ line = fiof.readline()
+ if line.startswith('%d'): # data type definitions
+ line_counter = 0
+ self.datacols = []
+ self.names = []
+ self.dtypes = []
+ line = fiof.readline()
+ while(line.startswith(' Col')):
+ splitline = line.split()
+ name = splitline[-2]
+ self.names.append(name)
+ dtype = dtypeConverter[splitline[-1]]
+ self.dtypes.append(dtype)
+ self.datacols.append((name, dtype))
+ prev = fiof.tell()
+ line = fiof.readline()
+ fiof.seek(prev)
+ break
+
+ line_counter += 1
+ if line_counter > ABORTLINENO:
+ raise IOError("Invalid fio file: Found no data "
+ "after %s lines" % ABORTLINENO)
+
+ self.data = numpy.loadtxt(fiof,
+ dtype={'names': tuple(self.names),
+ 'formats': tuple(self.dtypes)},
+ comments="!")
+
+ # ToDo: read only last line of file,
+ # which sometimes contains the end of acquisition timestamp.
+
+ self.parameter = {}
+
+ # parse parameter section:
+ try:
+ for line in self.parameterssection.splitlines():
+ param, value = line.split(' = ')
+ self.parameter[param] = value
+ except Exception:
+ logger1.warning("Cannot parse parameter section")
+
+ # parse default sardana comments: username and start time
+ try:
+ acquiMarker = "acquisition started at" # indicates timestamp
+ commentlines = self.commentsection.splitlines()
+ if len(commentlines) >= 2:
+ self.title = commentlines[0]
+ l2 = commentlines[1]
+ acqpos = l2.lower().find(acquiMarker)
+ if acqpos < 0:
+ raise Exception("acquisition str not found")
+
+ self.user = l2[:acqpos][4:].strip()
+ self.start_time = l2[acqpos+len(acquiMarker):].strip()
+ commentlines = commentlines[2:]
+ self.comments = "\n".join(commentlines[2:])
+
+ except Exception:
+ logger1.warning("Cannot parse default comment section")
+ self.comments = self.commentsection
+ self.user = ""
+ self.start_time = ""
+ self.title = ""
+
+
+class FioH5NodeDataset(commonh5.Dataset):
+ """This class inherits :class:`commonh5.Dataset`, to which it adds
+ little extra functionality. The main additional functionality is the
+ proxy behavior that allows to mimic the numpy array stored in this
+ class.
+ """
+
+ def __init__(self, name, data, parent=None, attrs=None):
+ # get proper value types, to inherit from numpy
+ # attributes (dtype, shape, size)
+ if isinstance(data, str):
+ # use unicode (utf-8 when saved to HDF5 output)
+ value = to_h5py_utf8(data)
+ elif isinstance(data, float):
+ # use 32 bits for float scalars
+ value = numpy.float32(data)
+ elif isinstance(data, int):
+ value = numpy.int_(data)
+ else:
+ # Enforce numpy array
+ array = numpy.array(data)
+ data_kind = array.dtype.kind
+
+ if data_kind in ["S", "U"]:
+ value = numpy.asarray(array,
+ dtype=text_dtype)
+ else:
+ value = array # numerical data is already the correct datatype
+ commonh5.Dataset.__init__(self, name, value, parent, attrs)
+
+ def __getattr__(self, item):
+ """Proxy to underlying numpy array methods.
+ """
+ if hasattr(self[()], item):
+ return getattr(self[()], item)
+
+ raise AttributeError("FioH5NodeDataset has no attribute %s" % item)
+
+
+class FioH5(commonh5.File):
+ """This class reads a FIO file and exposes it as a *h5py.File*.
+
+ It inherits :class:`silx.io.commonh5.Group` (via :class:`commonh5.File`),
+ which implements most of its API.
+ """
+
+ def __init__(self, filename, order=1):
+ """
+ :param filename: Path to FioFile in filesystem
+ :type filename: str
+ """
+ if isinstance(filename, io.IOBase):
+ # see https://github.com/silx-kit/silx/issues/858
+ filename = filename.name
+
+ if not is_fiofile(filename):
+ raise IOError("File %s is not a FIO file." % filename)
+
+ try:
+ fiof = FioFile(filename) # reads complete file
+ except Exception as e:
+ raise IOError("FIO file %s cannot be read.") from e
+
+ attrs = {"NX_class": to_h5py_utf8("NXroot"),
+ "file_time": to_h5py_utf8(
+ datetime.datetime.now().isoformat()),
+ "file_name": to_h5py_utf8(filename),
+ "creator": to_h5py_utf8("silx fioh5 %s" % silx_version)}
+ commonh5.File.__init__(self, filename, attrs=attrs)
+
+ if fiof.scanno is not None:
+ scan_key = "%s.%s" % (fiof.scanno, int(order))
+ else:
+ scan_key = os.path.splitext(os.path.basename(filename))[0]
+
+ scan_group = FioScanGroup(scan_key, parent=self, scan=fiof)
+ self.add_node(scan_group)
+
+
+class FioScanGroup(commonh5.Group):
+ def __init__(self, scan_key, parent, scan):
+ """
+
+ :param parent: parent Group
+ :param str scan_key: Scan key (e.g. "1.1")
+ :param scan: FioFile object
+ """
+ if hasattr(scan, 'user'):
+ userattr = to_h5py_utf8(scan.user)
+ else:
+ userattr = to_h5py_utf8('')
+ commonh5.Group.__init__(self, scan_key, parent=parent,
+ attrs={"NX_class": to_h5py_utf8("NXentry"),
+ "user": userattr})
+
+ # 'title', 'start_time' and 'user' are defaults
+ # in Sardana created files:
+ if hasattr(scan, 'title'):
+ title = scan.title
+ else:
+ title = scan_key # use scan number as default title
+ self.add_node(FioH5NodeDataset(name="title",
+ data=to_h5py_utf8(title),
+ parent=self))
+
+ if hasattr(scan, 'start_time'):
+ start_time = scan.start_time
+ self.add_node(FioH5NodeDataset(name="start_time",
+ data=to_h5py_utf8(start_time),
+ parent=self))
+
+ self.add_node(FioH5NodeDataset(name="comments",
+ data=to_h5py_utf8(scan.comments),
+ parent=self))
+
+ self.add_node(FioInstrumentGroup(parent=self, scan=scan))
+ self.add_node(FioMeasurementGroup(parent=self, scan=scan))
+
+
+class FioMeasurementGroup(commonh5.Group):
+ def __init__(self, parent, scan):
+ """
+
+ :param parent: parent Group
+ :param scan: FioFile object
+ """
+ commonh5.Group.__init__(self, name="measurement", parent=parent,
+ attrs={"NX_class": to_h5py_utf8("NXcollection")})
+
+ for label in scan.names:
+ safe_label = label.replace("/", "%")
+ self.add_node(FioH5NodeDataset(name=safe_label,
+ data=scan.data[label],
+ parent=self))
+
+
+class FioInstrumentGroup(commonh5.Group):
+ def __init__(self, parent, scan):
+ """
+
+ :param parent: parent Group
+ :param scan: FioFile object
+ """
+ commonh5.Group.__init__(self, name="instrument", parent=parent,
+ attrs={"NX_class": to_h5py_utf8("NXinstrument")})
+
+ self.add_node(FioParameterGroup(parent=self, scan=scan))
+ self.add_node(FioFileGroup(parent=self, scan=scan))
+ self.add_node(FioH5NodeDataset(name="comment",
+ data=to_h5py_utf8(scan.comments),
+ parent=self))
+
+
+class FioFileGroup(commonh5.Group):
+ def __init__(self, parent, scan):
+ """
+
+ :param parent: parent Group
+ :param scan: FioFile object
+ """
+ commonh5.Group.__init__(self, name="fiofile", parent=parent,
+ attrs={"NX_class": to_h5py_utf8("NXcollection")})
+
+ self.add_node(FioH5NodeDataset(name="comments",
+ data=to_h5py_utf8(scan.commentsection),
+ parent=self))
+
+ self.add_node(FioH5NodeDataset(name="parameter",
+ data=to_h5py_utf8(scan.parameterssection),
+ parent=self))
+
+
+class FioParameterGroup(commonh5.Group):
+ def __init__(self, parent, scan):
+ """
+
+ :param parent: parent Group
+ :param scan: FioFile object
+ """
+ commonh5.Group.__init__(self, name="parameter", parent=parent,
+ attrs={"NX_class": to_h5py_utf8("NXcollection")})
+
+ for label in scan.parameter:
+ safe_label = label.replace("/", "%")
+ self.add_node(FioH5NodeDataset(name=safe_label,
+ data=to_h5py_utf8(scan.parameter[label]),
+ parent=self))