diff options
author | Onderwaater <onderwaa@esrf.fr> | 2015-10-05 13:35:09 +0200 |
---|---|---|
committer | Onderwaater <onderwaa@esrf.fr> | 2015-10-05 13:35:09 +0200 |
commit | b7f2706d9e618ae66c52ca15eeda78bce3abdd4a (patch) | |
tree | b14529d4cfae78ad4b82640c08b13147f2e3c84e | |
parent | 960550f3ccfaa724313c586d0f829fbad31c0488 (diff) | |
parent | a61709d8d9da38f34dab165213d3e07e30d5ef71 (diff) |
Merge remote-tracking branch 'origin/master'
Conflicts:
BINoculars/backends/id03.py
BINoculars/util.py
-rw-r--r-- | BINoculars/backend.py | 3 | ||||
-rw-r--r-- | BINoculars/backends/id03.py | 8 | ||||
-rwxr-xr-x | BINoculars/dispatcher.py | 4 | ||||
-rwxr-xr-x | BINoculars/main.py | 12 | ||||
-rw-r--r-- | BINoculars/space.py | 30 | ||||
-rwxr-xr-x | BINoculars/util.py | 171 | ||||
-rw-r--r-- | test/cfg.py | 2 | ||||
-rw-r--r-- | test/metadata.py | 51 |
8 files changed, 255 insertions, 26 deletions
diff --git a/BINoculars/backend.py b/BINoculars/backend.py index dcbe5a9..2b5e0cf 100644 --- a/BINoculars/backend.py +++ b/BINoculars/backend.py @@ -33,6 +33,7 @@ class InputBase(util.ConfigurableObject): Note: there is no guarantee that generate_jobs() and process_jobs() will be called on the same instance, not even in the same process or on the same computer!""" + def parse_config(self, config): super(InputBase, self).parse_config(config) self.config.target_weight = int(config.pop('target_weight', 0))## approximate number of images per job, only useful when running on the oar cluster @@ -45,7 +46,7 @@ class InputBase(util.ConfigurableObject): """Receives a Job() instance, yields (intensity, args_to_be_sent_to_a_Projection_instance) Job()s could have been pickle'd and distributed over a cluster""" - raise NotImplementedError + self.metadata = util.MetaBase('job', job.__dict__) def get_destination_options(self, command): """Receives the same command as generate_jobs(), but returns diff --git a/BINoculars/backends/id03.py b/BINoculars/backends/id03.py index 4cfdc99..c2ec41e 100644 --- a/BINoculars/backends/id03.py +++ b/BINoculars/backends/id03.py @@ -271,7 +271,9 @@ class ID03Input(backend.InputBase): yield backend.Job(scan=scanno, firstpoint=0, lastpoint=pointcount-1, weight=pointcount) def process_job(self, job): - scan = self.get_scan(job.scan) + super(ID03Input, self).process_job(job) + scan = self.get_scan(job.scan) + self.metadict = dict() try: scanparams = self.get_scan_params(scan) # wavelength, UB pointparams = self.get_point_params(scan, job.firstpoint, job.lastpoint) # 2D array of diffractometer angles + mon + transm @@ -283,6 +285,7 @@ class ID03Input(backend.InputBase): except Exception as exc: exc.args = errors.addmessage(exc.args, ', An error occured for scan {0} at point {1}. See above for more information'.format(self.dbg_scanno, self.dbg_pointno)) raise + self.metadata.add_section('id03_backend', self.metadict) def parse_config(self, config): super(ID03Input, self).parse_config(config) @@ -402,6 +405,9 @@ class ID03Input(backend.InputBase): UB = numpy.array(scan.header('G')[2].split(' ')[-9:],dtype=numpy.float) wavelength = float(scan.header('G')[1].split(' ')[-1]) + self.metadict['UB'] = UB + self.metadict['wavelength'] = wavelength + return wavelength, UB diff --git a/BINoculars/dispatcher.py b/BINoculars/dispatcher.py index 5cdbc84..ec721e1 100755 --- a/BINoculars/dispatcher.py +++ b/BINoculars/dispatcher.py @@ -20,7 +20,8 @@ class Destination(object): if opts is not False: self.opts = opts - def set_config(self, conf): + def set_config(self, conf, meta): + self.meta = meta self.config = conf def set_tmp_filename(self, filename): @@ -39,6 +40,7 @@ class Destination(object): elif self.type == 'final': fn = self.final_filename() space.config = self.config + space.metadata += self.meta space.tofile(fn) def retrieve(self): diff --git a/BINoculars/main.py b/BINoculars/main.py index afbcb64..77d4d5c 100755 --- a/BINoculars/main.py +++ b/BINoculars/main.py @@ -33,7 +33,12 @@ class Main(object): else: raise ValueError('Configfile is the wrong type') + # distribute the configfile to space and to the metadata instance spaceconf = self.config.copy() + metadataconfig = self.config.copy() + metadataconfig.add_section('command', {'command' : command}) + metadata = util.MetaData() + metadata.add_dataset(metadataconfig) #input from either the configfile or the configsectiongroup is valid self.dispatcher = backend.get_dispatcher(config.dispatcher, self, default='local') @@ -41,7 +46,8 @@ class Main(object): self.input = backend.get_input(config.input) self.dispatcher.config.destination.set_final_options(self.input.get_destination_options(command)) - self.dispatcher.config.destination.set_config(spaceconf) + if command: + self.dispatcher.config.destination.set_config(spaceconf, metadata) self.run(command) @classmethod @@ -88,7 +94,9 @@ class Main(object): for intensity, params in self.input.process_job(job): coords = self.projection.project(*params) yield space.Space.from_image(res, labels, coords, intensity) - return space.chunked_sum(generator(), chunksize=25) + jobspace = space.chunked_sum(generator(), chunksize=25) + jobspace.metadata.add_dataset(self.input.metadata) + return jobspace def clone_config(self): config = util.ConfigSectionGroup() diff --git a/BINoculars/space.py b/BINoculars/space.py index 590a463..9deb639 100644 --- a/BINoculars/space.py +++ b/BINoculars/space.py @@ -313,13 +313,14 @@ class Space(object): contribitions n-dimensional numpy integer array, number of original datapoints (pixels) per grid point dimension n""" - def __init__(self, axes, config=None): + def __init__(self, axes, config=None, metadata=None): if not isinstance(axes, Axes): self.axes = Axes(axes) else: self.axes = axes self.config = config + self.metadata = metadata self.photons = numpy.zeros([len(ax) for ax in self.axes], order='C') self.contributions = numpy.zeros(self.photons.shape, dtype=numpy.uint32, order='C') @@ -352,9 +353,23 @@ class Space(object): else: raise TypeError("'{0!r}' is not a util.ConfigFile".format(space)) + @property + def metadata(self): + """util.ConfigFile instance describing configuration file used to create this Space instance""" + return self._metadata + + @metadata.setter + def metadata(self, metadata): + if isinstance(metadata, util.MetaData): + self._metadata = metadata + elif not metadata: + self._metadata = util.MetaData() + else: + raise TypeError("'{0!r}' is not a util.MetaData".format(space)) + def copy(self): """Returns a copy of self. Numpy data is not shared, but the Axes object is.""" - new = self.__class__(self.axes, self.config) + new = self.__class__(self.axes, self.config, self.metadata) new.photons[:] = self.photons new.contributions[:] = self.contributions return new @@ -374,7 +389,7 @@ class Space(object): newaxes = tuple(ax[k] for k, ax in zip(newkey, self.axes) if isinstance(ax[k], Axis)) if not newaxes: return self.photons[newkey] / self.contributions[newkey] - newspace = self.__class__(newaxes) + newspace = self.__class__(newaxes, self.config, self.metadata) newspace.photons = self.photons[newkey].copy() newspace.contributions = self.contributions[newkey].copy() return newspace @@ -400,7 +415,7 @@ class Space(object): index = self.axes.index(axis) newaxes = list(self.axes) newaxes.pop(index) - newspace = self.__class__(newaxes) + newspace = self.__class__(newaxes, self.config, self.metadata) newspace.photons = self.photons.sum(axis=index) newspace.contributions = self.contributions.sum(axis=index) @@ -469,6 +484,7 @@ class Space(object): index = tuple(slice(self_ax.get_index(other_ax.min), self_ax.get_index(other_ax.min) + len(other_ax)) for (self_ax, other_ax) in zip(self.axes, other.axes)) self.photons[index] += other.photons self.contributions[index] += other.contributions + self.metadata += other.metadata return self def __sub__(self, other): @@ -566,7 +582,7 @@ class Space(object): if not self.dimension == len(labels): raise ValueError('dimension mismatch') newindices = list(self.axes.index(label) for label in labels) - new = self.__class__(tuple(self.axes[index] for index in newindices)) + new = self.__class__(tuple(self.axes[index] for index in newindices), self.config, self.metadata) new.photons = numpy.transpose(self.photons, axes = newindices) new.contributions = numpy.transpose(self.contributions, axes = newindices) return new @@ -635,6 +651,7 @@ class Space(object): with util.open_h5py(tmpname, 'w') as fp: self.config.tofile(fp) self.axes.tofile(fp) + self.metadata.tofile(fp) fp.create_dataset('counts', self.photons.shape, dtype=self.photons.dtype, compression='gzip').write_direct(self.photons) fp.create_dataset('contributions', self.contributions.shape, dtype=self.contributions.dtype, compression='gzip').write_direct(self.contributions) @@ -648,6 +665,7 @@ class Space(object): with util.open_h5py(file, 'r') as fp: axes = Axes.fromfile(fp) config = util.ConfigFile.fromfile(fp) + metadata = util.MetaData.fromfile(fp) if key: if len(axes) != len(key): raise ValueError("dimensionality of 'key' does not match dimensionality of Space in HDF5 file {0}".format(file)) @@ -655,7 +673,7 @@ class Space(object): axes = tuple(ax[k] for k, ax in zip(key, axes) if isinstance(k, slice)) else: key = Ellipsis - space = cls(axes, config) + space = cls(axes, config, metadata) try: fp['counts'].read_direct(space.photons, key) fp['contributions'].read_direct(space.contributions, key) diff --git a/BINoculars/util.py b/BINoculars/util.py index 7a8e30f..7787310 100755 --- a/BINoculars/util.py +++ b/BINoculars/util.py @@ -17,7 +17,8 @@ import errors import StringIO import struct import socket -import json +import StringIO +import binascii ### ARGUMENT HANDLING @@ -299,11 +300,151 @@ def parse_bool(s): return False raise ValueError("invalid input for boolean: '{0}'".format(s)) + +class MetaBase(object): + def __init__(self, label = None, section = None): + self.sections = [] + if label is not None and section is not None: + self.sections.append(label) + setattr(self, label, section) + elif label is not None: + self.sections.append(label) + setattr(self, label, dict()) + + def add_section(self, label, section = None): + self.sections.append(label) + if section is not None: + setattr(self, label, section) + else: + setattr(self, label, dict()) + + def __repr__(self): + str = '{0.__class__.__name__}{{\n'.format(self) + for section in self.sections: + str += ' [{}]\n'.format(section) + s = getattr(self, section) + for entry in s: + str += ' {} = {}\n'.format(entry, s[entry]) + str += '}\n' + return str + + def copy(self): + return copy.deepcopy(self) + + def serialize(self): + sections = {} + for section in self.sections: + section_dict = {} + attr = getattr(self, section) + for key in attr.keys(): + if isinstance(attr[key], numpy.ndarray):# to be able to include numpy arrays in the serialisation + sio = StringIO.StringIO() + numpy.save(sio, attr[key]) + sio.seek(0) + section_dict[key] = binascii.b2a_hex(sio.read())#hex codation is needed to let json work with the string + else: + section_dict[key] = attr[key] + sections[section] = section_dict + return json.dumps(sections) + + @classmethod + def fromserial(cls, s): + obj = cls() + data = json.loads(s) + for section in data.keys(): + section_dict = data[section] + for key in section_dict.keys(): + if isinstance(section_dict[key], basestring):#find and replace all the numpy serialised objects + if section_dict[key].startswith('934e554d505901004600'):#numpy marker + sio = StringIO.StringIO() + sio.write(binascii.a2b_hex(section_dict[key])) + sio.seek(0) + section_dict[key] = numpy.load(sio) + obj.add_section(section, data[section]) + return obj + +# a collection of metadata objects +class MetaData(object): + def __init__(self): + self.metas = [] + + def add_dataset(self, dataset): + if not isinstance(dataset, MetaBase) and not isinstance(dataset, ConfigFile): + raise ValueError('MetaBase instance expected') + else: + self.metas.append(dataset) + + def __add__(self, other): + new = self.__class__() + new += self + new += other + return new + + def __iadd__(self, other): + self.metas.extend(other.metas) + return self + + @classmethod + def fromfile(cls, filename): + if isinstance(filename, basestring): + if not os.path.exists(filename): + raise IOError('Error importing configuration file. filename {0} does not exist'.format(filename)) + + metadataobj = cls() + with open_h5py(filename, 'r') as fp: + try: + metadata = fp['metadata'] + except KeyError as e: + metadata = [] # when metadata is not present, proceed without Error + for label in metadata: + meta = MetaBase() + for section in metadata[label].keys(): + group = metadata[label][section] + setattr(meta, section, dict((key, group[key].value) for key in group)) + meta.sections.append(section) + metadataobj.metas.append(meta) + return metadataobj + + def tofile(self, filename): + with open_h5py(filename, 'w') as fp: + metadata = fp.create_group('metadata') + for meta in self.metas: + label = find_unused_label('metasection', metadata.keys()) + metabase = metadata.create_group(label) + for section in meta.sections: + sectiongroup = metabase.create_group(section) + s = getattr(meta, section) + for key in s.keys(): + sectiongroup.create_dataset(key, data = s[key]) + + + + def __repr__(self): + str = '{0.__class__.__name__}{{\n'.format(self) + for meta in self.metas: + for line in meta.__repr__().split('\n'): + str += ' ' + line + '\n' + str += '}\n' + return str + + + def serialize(self): + data = {} + return json.dumps(list(meta.serialize() for meta in self.metas)) + + @classmethod + def fromserial(cls, s): + obj = cls() + for item in json.loads(s): + obj.metas.append(MetaBase.fromserial(item)) + return obj + #Contains the unparsed config dicts -class ConfigFile(object): +class ConfigFile(Metabase): def __init__(self, origin='n/a', command = []): self.origin = origin self.command = command + super(ConfigFile, self).__init__() self.sections = 'dispatcher', 'projection', 'input' for section in self.sections: setattr(self, section, dict()) @@ -314,7 +455,7 @@ class ConfigFile(object): if not os.path.exists(filename): raise IOError('Error importing configuration file. filename {0} does not exist'.format(filename)) - configobj = cls(filename) + configobj = cls(str(filename)) with open_h5py(filename, 'r') as fp: try: config = fp['configuration'] @@ -366,20 +507,11 @@ class ConfigFile(object): fp.write('{} = {}\n'.format(entry, s[entry])) def __repr__(self): - str = '{0.__class__.__name__}{{\n'.format(self) - for section in self.sections: - str += ' [{}]\n'.format(section) - s = getattr(self, section) - for entry in s: - str += ' {} = {}\n'.format(entry, s[entry]) - str += '}\n' - str += 'origin = {0}\n'.format(self.origin) + str = super(ConfigFile, self).__repr__() + str += 'origin = {0}'.format(self.origin) str += 'command = {0}'.format(','.join(self.command)) return str - def copy(self): - return copy.deepcopy(self) - #contains one parsed dict, for distribution to dispatcher, input or projection class class ConfigSection(object): def __init__(self, **kwargs): @@ -401,6 +533,8 @@ class ConfigurableObject(object): def __init__(self, config): if isinstance(config, ConfigSection): self.config = config + elif not isinstance(config, dict): + raise ValueError('expecting dict or Configsection, not: {0}'.format(type(config))) else: self.config = ConfigSection() try: @@ -441,6 +575,15 @@ def find_unused_filename(filename): if not os.path.exists(f): return f +def label_enumerator(label, start=0): + for count in itertools.count(start): + yield '{0}_{1}'.format(label,count) + +def find_unused_label(label, labellist): + for l in label_enumerator(label): + if not l in labellist: + return l + def yield_when_exists(filelist, timeout=None): """Wait for files in 'filelist' to appear, for a maximum of 'timeout' seconds, yielding them in arbitrary order as soon as they appear. diff --git a/test/cfg.py b/test/cfg.py index e53eed6..7909237 100644 --- a/test/cfg.py +++ b/test/cfg.py @@ -11,7 +11,7 @@ class TestCase(unittest.TestCase): def test_IO(self): self.cfg.totxtfile('test.txt') self.cfg.tofile('test.hdf5') - BINoculars.util.ConfigFile.fromfile('test.hdf5') + print BINoculars.util.ConfigFile.fromfile('test.hdf5') self.assertRaises(IOError, BINoculars.util.ConfigFile.fromtxtfile, '') self.assertRaises(IOError, BINoculars.util.ConfigFile.fromfile, '') diff --git a/test/metadata.py b/test/metadata.py new file mode 100644 index 0000000..69a79a7 --- /dev/null +++ b/test/metadata.py @@ -0,0 +1,51 @@ +import BINoculars.util +import BINoculars.space +import os +import numpy + +import unittest + +class TestCase(unittest.TestCase): + def setUp(self): + fn = 'examples/configs/example_config_id03' + self.cfg = BINoculars.util.ConfigFile.fromtxtfile(fn) + + def test_IO(self): + test = {'string' : 'string', 'numpy.array' : numpy.arange(10), 'list' : range(10), 'tuple' : tuple(range(10))} + metasection = BINoculars.util.MetaBase() + metasection.add_section('first', test) + print metasection + + metadata = BINoculars.util.MetaData() + metadata.add_dataset(metasection) + metadata.add_dataset(self.cfg) + + metadata.tofile('test.hdf5') + + metadata += BINoculars.util.MetaData.fromfile('test.hdf5') + + axis = tuple(BINoculars.space.Axis(0,10,1,label) for label in ['h', 'k', 'l']) + axes = BINoculars.space.Axes(axis) + space = BINoculars.space.Space(axes) + spacedict = dict(z for z in zip('abcde', range(5))) + dataset = BINoculars.util.MetaBase('fromspace', spacedict) + space.metadata.add_dataset(dataset) + + space.tofile('test2.hdf5') + testspace = BINoculars.space.Space.fromfile('test2.hdf5') + + print (space + testspace).metadata + + print '--------------------------------------------------------' + print metadata + print metadata.serialize() + print BINoculars.util.MetaData.fromserial(metadata.serialize()) + + def tearDown(self): + os.remove('test.hdf5') + os.remove('test2.hdf5') + +if __name__ == '__main__': + unittest.main() + + |