summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorOnderwaater <onderwaa@esrf.fr>2015-09-03 15:02:23 +0200
committerOnderwaater <onderwaa@esrf.fr>2015-09-03 15:02:23 +0200
commita184e4625e76b2395ace22fc639a0b5fd6a7bcde (patch)
tree7be95c125d28d2acf1bac854232daac0084e5388
parent91d677d63bccc32bbabee5c226995108d8e6bb6d (diff)
Store metadata in space
-rw-r--r--BINoculars/backend.py3
-rw-r--r--BINoculars/backends/id03.py7
-rw-r--r--BINoculars/main.py10
-rw-r--r--BINoculars/space.py30
-rw-r--r--BINoculars/util.py128
-rw-r--r--test/cfg.py2
-rw-r--r--test/metadata.py48
7 files changed, 203 insertions, 25 deletions
diff --git a/BINoculars/backend.py b/BINoculars/backend.py
index dcbe5a9..2b5e0cf 100644
--- a/BINoculars/backend.py
+++ b/BINoculars/backend.py
@@ -33,6 +33,7 @@ class InputBase(util.ConfigurableObject):
Note: there is no guarantee that generate_jobs() and process_jobs() will
be called on the same instance, not even in the same process or on the
same computer!"""
+
def parse_config(self, config):
super(InputBase, self).parse_config(config)
self.config.target_weight = int(config.pop('target_weight', 0))## approximate number of images per job, only useful when running on the oar cluster
@@ -45,7 +46,7 @@ class InputBase(util.ConfigurableObject):
"""Receives a Job() instance, yields (intensity, args_to_be_sent_to_a_Projection_instance)
Job()s could have been pickle'd and distributed over a cluster"""
- raise NotImplementedError
+ self.metadata = util.MetaBase('job', job.__dict__)
def get_destination_options(self, command):
"""Receives the same command as generate_jobs(), but returns
diff --git a/BINoculars/backends/id03.py b/BINoculars/backends/id03.py
index 434b81c..e2ecf58 100644
--- a/BINoculars/backends/id03.py
+++ b/BINoculars/backends/id03.py
@@ -247,8 +247,9 @@ class ID03Input(backend.InputBase):
yield backend.Job(scan=scanno, firstpoint=0, lastpoint=pointcount-1, weight=pointcount)
def process_job(self, job):
+ super(ID03Input, self).process_job(job)
scan = self.get_scan(job.scan)
-
+ self.metadict = dict()
try:
scanparams = self.get_scan_params(scan) # wavelength, UB
pointparams = self.get_point_params(scan, job.firstpoint, job.lastpoint) # 2D array of diffractometer angles + mon + transm
@@ -259,6 +260,7 @@ class ID03Input(backend.InputBase):
except Exception as exc:
exc.args = errors.addmessage(exc.args, ', An error occured for scan {0} at point {1}. See above for more information'.format(self.dbg_scanno, self.dbg_pointno))
raise
+ self.metadata.add_section('id03_backend', self.metadict)
def parse_config(self, config):
super(ID03Input, self).parse_config(config)
@@ -342,6 +344,9 @@ class ID03Input(backend.InputBase):
UB = numpy.array(scan.header('G')[2].split(' ')[-9:],dtype=numpy.float)
wavelength = float(scan.header('G')[1].split(' ')[-1])
+ self.metadict['UB'] = UB
+ self.metadict['wavelength'] = wavelength
+
return wavelength, UB
diff --git a/BINoculars/main.py b/BINoculars/main.py
index e778bc1..75f5b7f 100644
--- a/BINoculars/main.py
+++ b/BINoculars/main.py
@@ -33,7 +33,12 @@ class Main(object):
else:
raise ValueError('Configfile is the wrong type')
+ # distribute the configfile to space and to the metadata instance
spaceconf = self.config.copy()
+ metadataconfig = self.config.copy()
+ metadataconfig.add_section('command', {'command' : command})
+ self.metadata = util.MetaData()
+ self.metadata.add_dataset(metadataconfig)
#input from either the configfile or the configsectiongroup is valid
self.dispatcher = backend.get_dispatcher(config.dispatcher, self, default='local')
@@ -77,6 +82,7 @@ class Main(object):
elif isinstance(self.result, space.EmptySpace):
sys.stderr.write('error: output is an empty dataset\n')
else:
+ self.result.metadata += self.metadata
self.dispatcher.config.destination.store(self.result)
@@ -87,7 +93,9 @@ class Main(object):
for intensity, params in self.input.process_job(job):
coords = self.projection.project(*params)
yield space.Space.from_image(res, labels, coords, intensity)
- return space.chunked_sum(generator(), chunksize=25)
+ jobspace = space.chunked_sum(generator(), chunksize=25)
+ jobspace.metadata.add_dataset(self.input.metadata)
+ return jobspace
def clone_config(self):
config = util.ConfigSectionGroup()
diff --git a/BINoculars/space.py b/BINoculars/space.py
index 44fafb5..9626b28 100644
--- a/BINoculars/space.py
+++ b/BINoculars/space.py
@@ -306,13 +306,14 @@ class Space(object):
contribitions n-dimensional numpy integer array, number of original datapoints (pixels) per grid point
dimension n"""
- def __init__(self, axes, config=None):
+ def __init__(self, axes, config=None, metadata=None):
if not isinstance(axes, Axes):
self.axes = Axes(axes)
else:
self.axes = axes
self.config = config
+ self.metadata = metadata
self.photons = numpy.zeros([len(ax) for ax in self.axes], order='C')
self.contributions = numpy.zeros(self.photons.shape, dtype=numpy.uint32, order='C')
@@ -345,9 +346,23 @@ class Space(object):
else:
raise TypeError("'{0!r}' is not a util.ConfigFile".format(space))
+ @property
+ def metadata(self):
+ """util.ConfigFile instance describing configuration file used to create this Space instance"""
+ return self._metadata
+
+ @metadata.setter
+ def metadata(self, metadata):
+ if isinstance(metadata, util.MetaData):
+ self._metadata = metadata
+ elif not metadata:
+ self._metadata = util.MetaData()
+ else:
+ raise TypeError("'{0!r}' is not a util.MetaData".format(space))
+
def copy(self):
"""Returns a copy of self. Numpy data is not shared, but the Axes object is."""
- new = self.__class__(self.axes, self.config)
+ new = self.__class__(self.axes, self.config, self.metadata)
new.photons[:] = self.photons
new.contributions[:] = self.contributions
return new
@@ -367,7 +382,7 @@ class Space(object):
newaxes = tuple(ax[k] for k, ax in zip(newkey, self.axes) if isinstance(ax[k], Axis))
if not newaxes:
return self.photons[newkey] / self.contributions[newkey]
- newspace = self.__class__(newaxes)
+ newspace = self.__class__(newaxes, self.config, self.metadata)
newspace.photons = self.photons[newkey].copy()
newspace.contributions = self.contributions[newkey].copy()
return newspace
@@ -393,7 +408,7 @@ class Space(object):
index = self.axes.index(axis)
newaxes = list(self.axes)
newaxes.pop(index)
- newspace = self.__class__(newaxes)
+ newspace = self.__class__(newaxes, self.config, self.metadata)
newspace.photons = self.photons.sum(axis=index)
newspace.contributions = self.contributions.sum(axis=index)
@@ -462,6 +477,7 @@ class Space(object):
index = tuple(slice(self_ax.get_index(other_ax.min), self_ax.get_index(other_ax.min) + len(other_ax)) for (self_ax, other_ax) in zip(self.axes, other.axes))
self.photons[index] += other.photons
self.contributions[index] += other.contributions
+ self.metadata += other.metadata
return self
def __sub__(self, other):
@@ -559,7 +575,7 @@ class Space(object):
if not self.dimension == len(labels):
raise ValueError('dimension mismatch')
newindices = list(self.axes.index(label) for label in labels)
- new = self.__class__(tuple(self.axes[index] for index in newindices))
+ new = self.__class__(tuple(self.axes[index] for index in newindices), self.config, self.metadata)
new.photons = numpy.transpose(self.photons, axes = newindices)
new.contributions = numpy.transpose(self.contributions, axes = newindices)
return new
@@ -621,6 +637,7 @@ class Space(object):
with util.open_h5py(tmpname, 'w') as fp:
self.config.tofile(fp)
self.axes.tofile(fp)
+ self.metadata.tofile(fp)
fp.create_dataset('counts', self.photons.shape, dtype=self.photons.dtype, compression='gzip').write_direct(self.photons)
fp.create_dataset('contributions', self.contributions.shape, dtype=self.contributions.dtype, compression='gzip').write_direct(self.contributions)
@@ -634,6 +651,7 @@ class Space(object):
with util.open_h5py(file, 'r') as fp:
axes = Axes.fromfile(fp)
config = util.ConfigFile.fromfile(fp)
+ metadata = util.MetaData.fromfile(fp)
if key:
if len(axes) != len(key):
raise ValueError("dimensionality of 'key' does not match dimensionality of Space in HDF5 file {0}".format(file))
@@ -641,7 +659,7 @@ class Space(object):
axes = tuple(ax[k] for k, ax in zip(key, axes) if isinstance(k, slice))
else:
key = Ellipsis
- space = cls(axes, config)
+ space = cls(axes, config, metadata)
try:
fp['counts'].read_direct(space.photons, key)
fp['contributions'].read_direct(space.contributions, key)
diff --git a/BINoculars/util.py b/BINoculars/util.py
index a924920..f6b7def 100644
--- a/BINoculars/util.py
+++ b/BINoculars/util.py
@@ -295,13 +295,109 @@ def parse_bool(s):
return False
raise ValueError("invalid input for boolean: '{0}'".format(s))
+
+class MetaBase(object):
+ def __init__(self, label = None, section = None):
+ self.sections = []
+ if label is not None and section is not None:
+ self.sections.append(label)
+ setattr(self, label, section)
+ elif label is not None:
+ self.sections.append(label)
+ setattr(self, label, dict())
+
+ def add_section(self, label, section = None):
+ self.sections.append(label)
+ if section is not None:
+ setattr(self, label, section)
+ else:
+ setattr(self, label, dict())
+
+ def __repr__(self):
+ str = '{0.__class__.__name__}{{\n'.format(self)
+ for section in self.sections:
+ str += ' [{}]\n'.format(section)
+ s = getattr(self, section)
+ for entry in s:
+ str += ' {} = {}\n'.format(entry, s[entry])
+ str += '}\n'
+ return str
+
+ def copy(self):
+ return copy.deepcopy(self)
+
+
+# a collection of metadata objects
+class MetaData(object):
+ def __init__(self):
+ self.metas = []
+
+ def add_dataset(self, dataset):
+ if not isinstance(dataset, MetaBase) and not isinstance(dataset, ConfigFile):
+ raise ValueError('MetaBase instance expected')
+ else:
+ self.metas.append(dataset)
+
+ def __add__(self, other):
+ new = self.__class__()
+ new += self
+ new += other
+ return new
+
+ def __iadd__(self, other):
+ self.metas.extend(other.metas)
+ return self
+
+ @classmethod
+ def fromfile(cls, filename):
+ if isinstance(filename, basestring):
+ if not os.path.exists(filename):
+ raise IOError('Error importing configuration file. filename {0} does not exist'.format(filename))
+
+ metadataobj = cls()
+ with open_h5py(filename, 'r') as fp:
+ try:
+ metadata = fp['metadata']
+ except KeyError as e:
+ metadata = [] # when metadata is not present, proceed without Error
+ for label in metadata:
+ meta = MetaBase()
+ for section in metadata[label].keys():
+ group = metadata[label][section]
+ setattr(meta, section, dict((key, group[key].value) for key in group))
+ meta.sections.append(section)
+ metadataobj.metas.append(meta)
+ return metadataobj
+
+ def tofile(self, filename):
+ with open_h5py(filename, 'w') as fp:
+ metadata = fp.create_group('metadata')
+ for meta in self.metas:
+ label = find_unused_label('metasection', metadata.keys())
+ metabase = metadata.create_group(label)
+ for section in meta.sections:
+ sectiongroup = metabase.create_group(section)
+ s = getattr(meta, section)
+ for key in s.keys():
+ sectiongroup.create_dataset(key, data = s[key])
+
+ def __repr__(self):
+ str = '{0.__class__.__name__}{{\n'.format(self)
+ for meta in self.metas:
+ for line in meta.__repr__().split('\n'):
+ str += ' ' + line + '\n'
+ str += '}\n'
+ return str
+
#Contains the unparsed config dicts
-class ConfigFile(object):
+class ConfigFile(MetaBase):
def __init__(self, origin='n/a'):
self.origin = origin
- self.sections = 'dispatcher', 'projection', 'input'
- for section in self.sections:
- setattr(self, section, dict())
+ super(ConfigFile, self).__init__()
+
+ sections = 'dispatcher', 'projection', 'input'
+ for section in sections:
+ self.add_section(section)
@classmethod
def fromfile(cls, filename):
@@ -309,7 +405,7 @@ class ConfigFile(object):
if not os.path.exists(filename):
raise IOError('Error importing configuration file. filename {0} does not exist'.format(filename))
- configobj = cls(filename)
+ configobj = cls(str(filename))
with open_h5py(filename, 'r') as fp:
try:
config = fp['configuration']
@@ -358,19 +454,10 @@ class ConfigFile(object):
fp.write('{} = {}\n'.format(entry, s[entry]))
def __repr__(self):
- str = '{0.__class__.__name__}{{\n'.format(self)
- for section in self.sections:
- str += ' [{}]\n'.format(section)
- s = getattr(self, section)
- for entry in s:
- str += ' {} = {}\n'.format(entry, s[entry])
- str += '}\n'
+ str = super(ConfigFile, self).__repr__()
str += 'origin = {0}'.format(self.origin)
return str
- def copy(self):
- return copy.deepcopy(self)
-
#contains one parsed dict, for distribution to dispatcher, input or projection class
class ConfigSection(object):
def __init__(self, **kwargs):
@@ -392,6 +479,8 @@ class ConfigurableObject(object):
def __init__(self, config):
if isinstance(config, ConfigSection):
self.config = config
+ elif not isinstance(config, dict):
+ raise ValueError('expecting dict or Configsection, not: {0}'.format(type(config)))
else:
self.config = ConfigSection()
try:
@@ -432,6 +521,15 @@ def find_unused_filename(filename):
if not os.path.exists(f):
return f
+def label_enumerator(label, start=0):
+ for count in itertools.count(start):
+ yield '{0}_{1}'.format(label,count)
+
+def find_unused_label(label, labellist):
+ for l in label_enumerator(label):
+ if not l in labellist:
+ return l
+
def yield_when_exists(filelist, timeout=None):
"""Wait for files in 'filelist' to appear, for a maximum of 'timeout' seconds,
yielding them in arbitrary order as soon as they appear.
diff --git a/test/cfg.py b/test/cfg.py
index e53eed6..7909237 100644
--- a/test/cfg.py
+++ b/test/cfg.py
@@ -11,7 +11,7 @@ class TestCase(unittest.TestCase):
def test_IO(self):
self.cfg.totxtfile('test.txt')
self.cfg.tofile('test.hdf5')
- BINoculars.util.ConfigFile.fromfile('test.hdf5')
+ print BINoculars.util.ConfigFile.fromfile('test.hdf5')
self.assertRaises(IOError, BINoculars.util.ConfigFile.fromtxtfile, '')
self.assertRaises(IOError, BINoculars.util.ConfigFile.fromfile, '')
diff --git a/test/metadata.py b/test/metadata.py
new file mode 100644
index 0000000..ef6c616
--- /dev/null
+++ b/test/metadata.py
@@ -0,0 +1,48 @@
+import BINoculars.util
+import BINoculars.space
+import os
+import numpy
+
+import unittest
+
+class TestCase(unittest.TestCase):
+ def setUp(self):
+ fn = 'examples/configs/example_config_id03'
+ self.cfg = BINoculars.util.ConfigFile.fromtxtfile(fn)
+
+ def test_IO(self):
+ test = {'string' : 'string', 'numpy.array' : numpy.arange(10), 'list' : range(10), 'tuple' : tuple(range(10))}
+ metasection = BINoculars.util.MetaBase()
+ metasection.add_section('first', test)
+ print metasection
+
+ metadata = BINoculars.util.MetaData()
+ metadata.add_dataset(metasection)
+ metadata.add_dataset(self.cfg)
+
+ metadata.tofile('test.hdf5')
+
+ metadata += BINoculars.util.MetaData.fromfile('test.hdf5')
+
+ axis = tuple(BINoculars.space.Axis(0,10,1,label) for label in ['h', 'k', 'l'])
+ axes = BINoculars.space.Axes(axis)
+ space = BINoculars.space.Space(axes)
+ spacedict = dict(z for z in zip('abcde', range(5)))
+ dataset = BINoculars.util.MetaBase('fromspace', spacedict)
+ space.metadata.add_dataset(dataset)
+
+ space.tofile('test2.hdf5')
+ testspace = BINoculars.space.Space.fromfile('test2.hdf5')
+
+ print (space + testspace).metadata
+
+
+
+ def tearDown(self):
+ os.remove('test.hdf5')
+ os.remove('test2.hdf5')
+
+if __name__ == '__main__':
+ unittest.main()
+
+