summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorOnderwaater <onderwaa@esrf.fr>2015-10-05 13:35:09 +0200
committerOnderwaater <onderwaa@esrf.fr>2015-10-05 13:35:09 +0200
commitb7f2706d9e618ae66c52ca15eeda78bce3abdd4a (patch)
treeb14529d4cfae78ad4b82640c08b13147f2e3c84e
parent960550f3ccfaa724313c586d0f829fbad31c0488 (diff)
parenta61709d8d9da38f34dab165213d3e07e30d5ef71 (diff)
Merge remote-tracking branch 'origin/master'
Conflicts: BINoculars/backends/id03.py BINoculars/util.py
-rw-r--r--BINoculars/backend.py3
-rw-r--r--BINoculars/backends/id03.py8
-rwxr-xr-xBINoculars/dispatcher.py4
-rwxr-xr-xBINoculars/main.py12
-rw-r--r--BINoculars/space.py30
-rwxr-xr-xBINoculars/util.py171
-rw-r--r--test/cfg.py2
-rw-r--r--test/metadata.py51
8 files changed, 255 insertions, 26 deletions
diff --git a/BINoculars/backend.py b/BINoculars/backend.py
index dcbe5a9..2b5e0cf 100644
--- a/BINoculars/backend.py
+++ b/BINoculars/backend.py
@@ -33,6 +33,7 @@ class InputBase(util.ConfigurableObject):
Note: there is no guarantee that generate_jobs() and process_jobs() will
be called on the same instance, not even in the same process or on the
same computer!"""
+
def parse_config(self, config):
super(InputBase, self).parse_config(config)
self.config.target_weight = int(config.pop('target_weight', 0))## approximate number of images per job, only useful when running on the oar cluster
@@ -45,7 +46,7 @@ class InputBase(util.ConfigurableObject):
"""Receives a Job() instance, yields (intensity, args_to_be_sent_to_a_Projection_instance)
Job()s could have been pickle'd and distributed over a cluster"""
- raise NotImplementedError
+ self.metadata = util.MetaBase('job', job.__dict__)
def get_destination_options(self, command):
"""Receives the same command as generate_jobs(), but returns
diff --git a/BINoculars/backends/id03.py b/BINoculars/backends/id03.py
index 4cfdc99..c2ec41e 100644
--- a/BINoculars/backends/id03.py
+++ b/BINoculars/backends/id03.py
@@ -271,7 +271,9 @@ class ID03Input(backend.InputBase):
yield backend.Job(scan=scanno, firstpoint=0, lastpoint=pointcount-1, weight=pointcount)
def process_job(self, job):
- scan = self.get_scan(job.scan)
+ super(ID03Input, self).process_job(job)
+ scan = self.get_scan(job.scan)
+ self.metadict = dict()
try:
scanparams = self.get_scan_params(scan) # wavelength, UB
pointparams = self.get_point_params(scan, job.firstpoint, job.lastpoint) # 2D array of diffractometer angles + mon + transm
@@ -283,6 +285,7 @@ class ID03Input(backend.InputBase):
except Exception as exc:
exc.args = errors.addmessage(exc.args, ', An error occured for scan {0} at point {1}. See above for more information'.format(self.dbg_scanno, self.dbg_pointno))
raise
+ self.metadata.add_section('id03_backend', self.metadict)
def parse_config(self, config):
super(ID03Input, self).parse_config(config)
@@ -402,6 +405,9 @@ class ID03Input(backend.InputBase):
UB = numpy.array(scan.header('G')[2].split(' ')[-9:],dtype=numpy.float)
wavelength = float(scan.header('G')[1].split(' ')[-1])
+ self.metadict['UB'] = UB
+ self.metadict['wavelength'] = wavelength
+
return wavelength, UB
diff --git a/BINoculars/dispatcher.py b/BINoculars/dispatcher.py
index 5cdbc84..ec721e1 100755
--- a/BINoculars/dispatcher.py
+++ b/BINoculars/dispatcher.py
@@ -20,7 +20,8 @@ class Destination(object):
if opts is not False:
self.opts = opts
- def set_config(self, conf):
+ def set_config(self, conf, meta):
+ self.meta = meta
self.config = conf
def set_tmp_filename(self, filename):
@@ -39,6 +40,7 @@ class Destination(object):
elif self.type == 'final':
fn = self.final_filename()
space.config = self.config
+ space.metadata += self.meta
space.tofile(fn)
def retrieve(self):
diff --git a/BINoculars/main.py b/BINoculars/main.py
index afbcb64..77d4d5c 100755
--- a/BINoculars/main.py
+++ b/BINoculars/main.py
@@ -33,7 +33,12 @@ class Main(object):
else:
raise ValueError('Configfile is the wrong type')
+ # distribute the configfile to space and to the metadata instance
spaceconf = self.config.copy()
+ metadataconfig = self.config.copy()
+ metadataconfig.add_section('command', {'command' : command})
+ metadata = util.MetaData()
+ metadata.add_dataset(metadataconfig)
#input from either the configfile or the configsectiongroup is valid
self.dispatcher = backend.get_dispatcher(config.dispatcher, self, default='local')
@@ -41,7 +46,8 @@ class Main(object):
self.input = backend.get_input(config.input)
self.dispatcher.config.destination.set_final_options(self.input.get_destination_options(command))
- self.dispatcher.config.destination.set_config(spaceconf)
+ if command:
+ self.dispatcher.config.destination.set_config(spaceconf, metadata)
self.run(command)
@classmethod
@@ -88,7 +94,9 @@ class Main(object):
for intensity, params in self.input.process_job(job):
coords = self.projection.project(*params)
yield space.Space.from_image(res, labels, coords, intensity)
- return space.chunked_sum(generator(), chunksize=25)
+ jobspace = space.chunked_sum(generator(), chunksize=25)
+ jobspace.metadata.add_dataset(self.input.metadata)
+ return jobspace
def clone_config(self):
config = util.ConfigSectionGroup()
diff --git a/BINoculars/space.py b/BINoculars/space.py
index 590a463..9deb639 100644
--- a/BINoculars/space.py
+++ b/BINoculars/space.py
@@ -313,13 +313,14 @@ class Space(object):
contribitions n-dimensional numpy integer array, number of original datapoints (pixels) per grid point
dimension n"""
- def __init__(self, axes, config=None):
+ def __init__(self, axes, config=None, metadata=None):
if not isinstance(axes, Axes):
self.axes = Axes(axes)
else:
self.axes = axes
self.config = config
+ self.metadata = metadata
self.photons = numpy.zeros([len(ax) for ax in self.axes], order='C')
self.contributions = numpy.zeros(self.photons.shape, dtype=numpy.uint32, order='C')
@@ -352,9 +353,23 @@ class Space(object):
else:
raise TypeError("'{0!r}' is not a util.ConfigFile".format(space))
+ @property
+ def metadata(self):
+ """util.ConfigFile instance describing configuration file used to create this Space instance"""
+ return self._metadata
+
+ @metadata.setter
+ def metadata(self, metadata):
+ if isinstance(metadata, util.MetaData):
+ self._metadata = metadata
+ elif not metadata:
+ self._metadata = util.MetaData()
+ else:
+ raise TypeError("'{0!r}' is not a util.MetaData".format(space))
+
def copy(self):
"""Returns a copy of self. Numpy data is not shared, but the Axes object is."""
- new = self.__class__(self.axes, self.config)
+ new = self.__class__(self.axes, self.config, self.metadata)
new.photons[:] = self.photons
new.contributions[:] = self.contributions
return new
@@ -374,7 +389,7 @@ class Space(object):
newaxes = tuple(ax[k] for k, ax in zip(newkey, self.axes) if isinstance(ax[k], Axis))
if not newaxes:
return self.photons[newkey] / self.contributions[newkey]
- newspace = self.__class__(newaxes)
+ newspace = self.__class__(newaxes, self.config, self.metadata)
newspace.photons = self.photons[newkey].copy()
newspace.contributions = self.contributions[newkey].copy()
return newspace
@@ -400,7 +415,7 @@ class Space(object):
index = self.axes.index(axis)
newaxes = list(self.axes)
newaxes.pop(index)
- newspace = self.__class__(newaxes)
+ newspace = self.__class__(newaxes, self.config, self.metadata)
newspace.photons = self.photons.sum(axis=index)
newspace.contributions = self.contributions.sum(axis=index)
@@ -469,6 +484,7 @@ class Space(object):
index = tuple(slice(self_ax.get_index(other_ax.min), self_ax.get_index(other_ax.min) + len(other_ax)) for (self_ax, other_ax) in zip(self.axes, other.axes))
self.photons[index] += other.photons
self.contributions[index] += other.contributions
+ self.metadata += other.metadata
return self
def __sub__(self, other):
@@ -566,7 +582,7 @@ class Space(object):
if not self.dimension == len(labels):
raise ValueError('dimension mismatch')
newindices = list(self.axes.index(label) for label in labels)
- new = self.__class__(tuple(self.axes[index] for index in newindices))
+ new = self.__class__(tuple(self.axes[index] for index in newindices), self.config, self.metadata)
new.photons = numpy.transpose(self.photons, axes = newindices)
new.contributions = numpy.transpose(self.contributions, axes = newindices)
return new
@@ -635,6 +651,7 @@ class Space(object):
with util.open_h5py(tmpname, 'w') as fp:
self.config.tofile(fp)
self.axes.tofile(fp)
+ self.metadata.tofile(fp)
fp.create_dataset('counts', self.photons.shape, dtype=self.photons.dtype, compression='gzip').write_direct(self.photons)
fp.create_dataset('contributions', self.contributions.shape, dtype=self.contributions.dtype, compression='gzip').write_direct(self.contributions)
@@ -648,6 +665,7 @@ class Space(object):
with util.open_h5py(file, 'r') as fp:
axes = Axes.fromfile(fp)
config = util.ConfigFile.fromfile(fp)
+ metadata = util.MetaData.fromfile(fp)
if key:
if len(axes) != len(key):
raise ValueError("dimensionality of 'key' does not match dimensionality of Space in HDF5 file {0}".format(file))
@@ -655,7 +673,7 @@ class Space(object):
axes = tuple(ax[k] for k, ax in zip(key, axes) if isinstance(k, slice))
else:
key = Ellipsis
- space = cls(axes, config)
+ space = cls(axes, config, metadata)
try:
fp['counts'].read_direct(space.photons, key)
fp['contributions'].read_direct(space.contributions, key)
diff --git a/BINoculars/util.py b/BINoculars/util.py
index 7a8e30f..7787310 100755
--- a/BINoculars/util.py
+++ b/BINoculars/util.py
@@ -17,7 +17,8 @@ import errors
import StringIO
import struct
import socket
-import json
+import StringIO
+import binascii
### ARGUMENT HANDLING
@@ -299,11 +300,151 @@ def parse_bool(s):
return False
raise ValueError("invalid input for boolean: '{0}'".format(s))
+
+class MetaBase(object):
+ def __init__(self, label = None, section = None):
+ self.sections = []
+ if label is not None and section is not None:
+ self.sections.append(label)
+ setattr(self, label, section)
+ elif label is not None:
+ self.sections.append(label)
+ setattr(self, label, dict())
+
+ def add_section(self, label, section = None):
+ self.sections.append(label)
+ if section is not None:
+ setattr(self, label, section)
+ else:
+ setattr(self, label, dict())
+
+ def __repr__(self):
+ str = '{0.__class__.__name__}{{\n'.format(self)
+ for section in self.sections:
+ str += ' [{}]\n'.format(section)
+ s = getattr(self, section)
+ for entry in s:
+ str += ' {} = {}\n'.format(entry, s[entry])
+ str += '}\n'
+ return str
+
+ def copy(self):
+ return copy.deepcopy(self)
+
+ def serialize(self):
+ sections = {}
+ for section in self.sections:
+ section_dict = {}
+ attr = getattr(self, section)
+ for key in attr.keys():
+ if isinstance(attr[key], numpy.ndarray):# to be able to include numpy arrays in the serialisation
+ sio = StringIO.StringIO()
+ numpy.save(sio, attr[key])
+ sio.seek(0)
+ section_dict[key] = binascii.b2a_hex(sio.read())#hex codation is needed to let json work with the string
+ else:
+ section_dict[key] = attr[key]
+ sections[section] = section_dict
+ return json.dumps(sections)
+
+ @classmethod
+ def fromserial(cls, s):
+ obj = cls()
+ data = json.loads(s)
+ for section in data.keys():
+ section_dict = data[section]
+ for key in section_dict.keys():
+ if isinstance(section_dict[key], basestring):#find and replace all the numpy serialised objects
+ if section_dict[key].startswith('934e554d505901004600'):#numpy marker
+ sio = StringIO.StringIO()
+ sio.write(binascii.a2b_hex(section_dict[key]))
+ sio.seek(0)
+ section_dict[key] = numpy.load(sio)
+ obj.add_section(section, data[section])
+ return obj
+
+# a collection of metadata objects
+class MetaData(object):
+ def __init__(self):
+ self.metas = []
+
+ def add_dataset(self, dataset):
+ if not isinstance(dataset, MetaBase) and not isinstance(dataset, ConfigFile):
+ raise ValueError('MetaBase instance expected')
+ else:
+ self.metas.append(dataset)
+
+ def __add__(self, other):
+ new = self.__class__()
+ new += self
+ new += other
+ return new
+
+ def __iadd__(self, other):
+ self.metas.extend(other.metas)
+ return self
+
+ @classmethod
+ def fromfile(cls, filename):
+ if isinstance(filename, basestring):
+ if not os.path.exists(filename):
+ raise IOError('Error importing configuration file. filename {0} does not exist'.format(filename))
+
+ metadataobj = cls()
+ with open_h5py(filename, 'r') as fp:
+ try:
+ metadata = fp['metadata']
+ except KeyError as e:
+ metadata = [] # when metadata is not present, proceed without Error
+ for label in metadata:
+ meta = MetaBase()
+ for section in metadata[label].keys():
+ group = metadata[label][section]
+ setattr(meta, section, dict((key, group[key].value) for key in group))
+ meta.sections.append(section)
+ metadataobj.metas.append(meta)
+ return metadataobj
+
+ def tofile(self, filename):
+ with open_h5py(filename, 'w') as fp:
+ metadata = fp.create_group('metadata')
+ for meta in self.metas:
+ label = find_unused_label('metasection', metadata.keys())
+ metabase = metadata.create_group(label)
+ for section in meta.sections:
+ sectiongroup = metabase.create_group(section)
+ s = getattr(meta, section)
+ for key in s.keys():
+ sectiongroup.create_dataset(key, data = s[key])
+
+
+
+ def __repr__(self):
+ str = '{0.__class__.__name__}{{\n'.format(self)
+ for meta in self.metas:
+ for line in meta.__repr__().split('\n'):
+ str += ' ' + line + '\n'
+ str += '}\n'
+ return str
+
+
+ def serialize(self):
+ data = {}
+ return json.dumps(list(meta.serialize() for meta in self.metas))
+
+ @classmethod
+ def fromserial(cls, s):
+ obj = cls()
+ for item in json.loads(s):
+ obj.metas.append(MetaBase.fromserial(item))
+ return obj
+
#Contains the unparsed config dicts
-class ConfigFile(object):
+class ConfigFile(Metabase):
def __init__(self, origin='n/a', command = []):
self.origin = origin
self.command = command
+ super(ConfigFile, self).__init__()
self.sections = 'dispatcher', 'projection', 'input'
for section in self.sections:
setattr(self, section, dict())
@@ -314,7 +455,7 @@ class ConfigFile(object):
if not os.path.exists(filename):
raise IOError('Error importing configuration file. filename {0} does not exist'.format(filename))
- configobj = cls(filename)
+ configobj = cls(str(filename))
with open_h5py(filename, 'r') as fp:
try:
config = fp['configuration']
@@ -366,20 +507,11 @@ class ConfigFile(object):
fp.write('{} = {}\n'.format(entry, s[entry]))
def __repr__(self):
- str = '{0.__class__.__name__}{{\n'.format(self)
- for section in self.sections:
- str += ' [{}]\n'.format(section)
- s = getattr(self, section)
- for entry in s:
- str += ' {} = {}\n'.format(entry, s[entry])
- str += '}\n'
- str += 'origin = {0}\n'.format(self.origin)
+ str = super(ConfigFile, self).__repr__()
+ str += 'origin = {0}'.format(self.origin)
str += 'command = {0}'.format(','.join(self.command))
return str
- def copy(self):
- return copy.deepcopy(self)
-
#contains one parsed dict, for distribution to dispatcher, input or projection class
class ConfigSection(object):
def __init__(self, **kwargs):
@@ -401,6 +533,8 @@ class ConfigurableObject(object):
def __init__(self, config):
if isinstance(config, ConfigSection):
self.config = config
+ elif not isinstance(config, dict):
+ raise ValueError('expecting dict or Configsection, not: {0}'.format(type(config)))
else:
self.config = ConfigSection()
try:
@@ -441,6 +575,15 @@ def find_unused_filename(filename):
if not os.path.exists(f):
return f
+def label_enumerator(label, start=0):
+ for count in itertools.count(start):
+ yield '{0}_{1}'.format(label,count)
+
+def find_unused_label(label, labellist):
+ for l in label_enumerator(label):
+ if not l in labellist:
+ return l
+
def yield_when_exists(filelist, timeout=None):
"""Wait for files in 'filelist' to appear, for a maximum of 'timeout' seconds,
yielding them in arbitrary order as soon as they appear.
diff --git a/test/cfg.py b/test/cfg.py
index e53eed6..7909237 100644
--- a/test/cfg.py
+++ b/test/cfg.py
@@ -11,7 +11,7 @@ class TestCase(unittest.TestCase):
def test_IO(self):
self.cfg.totxtfile('test.txt')
self.cfg.tofile('test.hdf5')
- BINoculars.util.ConfigFile.fromfile('test.hdf5')
+ print BINoculars.util.ConfigFile.fromfile('test.hdf5')
self.assertRaises(IOError, BINoculars.util.ConfigFile.fromtxtfile, '')
self.assertRaises(IOError, BINoculars.util.ConfigFile.fromfile, '')
diff --git a/test/metadata.py b/test/metadata.py
new file mode 100644
index 0000000..69a79a7
--- /dev/null
+++ b/test/metadata.py
@@ -0,0 +1,51 @@
+import BINoculars.util
+import BINoculars.space
+import os
+import numpy
+
+import unittest
+
+class TestCase(unittest.TestCase):
+ def setUp(self):
+ fn = 'examples/configs/example_config_id03'
+ self.cfg = BINoculars.util.ConfigFile.fromtxtfile(fn)
+
+ def test_IO(self):
+ test = {'string' : 'string', 'numpy.array' : numpy.arange(10), 'list' : range(10), 'tuple' : tuple(range(10))}
+ metasection = BINoculars.util.MetaBase()
+ metasection.add_section('first', test)
+ print metasection
+
+ metadata = BINoculars.util.MetaData()
+ metadata.add_dataset(metasection)
+ metadata.add_dataset(self.cfg)
+
+ metadata.tofile('test.hdf5')
+
+ metadata += BINoculars.util.MetaData.fromfile('test.hdf5')
+
+ axis = tuple(BINoculars.space.Axis(0,10,1,label) for label in ['h', 'k', 'l'])
+ axes = BINoculars.space.Axes(axis)
+ space = BINoculars.space.Space(axes)
+ spacedict = dict(z for z in zip('abcde', range(5)))
+ dataset = BINoculars.util.MetaBase('fromspace', spacedict)
+ space.metadata.add_dataset(dataset)
+
+ space.tofile('test2.hdf5')
+ testspace = BINoculars.space.Space.fromfile('test2.hdf5')
+
+ print (space + testspace).metadata
+
+ print '--------------------------------------------------------'
+ print metadata
+ print metadata.serialize()
+ print BINoculars.util.MetaData.fromserial(metadata.serialize())
+
+ def tearDown(self):
+ os.remove('test.hdf5')
+ os.remove('test2.hdf5')
+
+if __name__ == '__main__':
+ unittest.main()
+
+