diff options
author | Onderwaater <onderwaa@esrf.fr> | 2015-12-07 16:47:58 +0100 |
---|---|---|
committer | Onderwaater <onderwaa@esrf.fr> | 2015-12-07 16:47:58 +0100 |
commit | c4a7fb18fd630828640c31c3049c5dd574095125 (patch) | |
tree | 767bf97f49fb970a8e74a1c2ea465904089c3ad7 | |
parent | 238bf3d8c389b28313009fd37729cfa13bd4abb3 (diff) |
introducing the multiverse, splitting the scan in multiple spaces
-rw-r--r-- | binoculars/__init__.py | 13 | ||||
-rw-r--r-- | binoculars/backend.py | 5 | ||||
-rwxr-xr-x | binoculars/dispatcher.py | 65 | ||||
-rwxr-xr-x | binoculars/main.py | 33 | ||||
-rwxr-xr-x | binoculars/space.py | 101 | ||||
-rwxr-xr-x | binoculars/util.py | 32 |
6 files changed, 182 insertions, 67 deletions
diff --git a/binoculars/__init__.py b/binoculars/__init__.py index e7824cf..b57c147 100644 --- a/binoculars/__init__.py +++ b/binoculars/__init__.py @@ -10,11 +10,11 @@ def run(args): All additonal configuration file overides can be included Returns - A binoculars space + A tuple of binoculars spaces Examples: >>> space = binoculars.run('config.txt 10') - >>> space + >>> space[0] Axes (3 dimensions, 2848 points, 33.0 kB) { Axis qx (min=-0.01, max=0.0, res=0.01, count=2) Axis qy (min=-0.04, max=-0.01, res=0.01, count=4) @@ -25,11 +25,12 @@ def run(args): import binoculars.main binoculars.util.register_python_executable(__file__) main = binoculars.main.Main.from_args(args.split(' ')) - if isinstance(main.result, binoculars.space.Space): - return main.result + + if isinstance(main.result, binoculars.space.Multiverse): + return main.result.spaces if type(main.result) == bool: - filename = main.dispatcher.config.destination.final_filename() - return binoculars.space.Space.fromfile(filename) + filenames = main.dispatcher.config.destination.final_filenames() + return tuple(binoculars.space.Space.fromfile(fn) for fn in filenames) def load(filename, key = None): ''' Parameters diff --git a/binoculars/backend.py b/binoculars/backend.py index 68eeafb..bf16bfd 100644 --- a/binoculars/backend.py +++ b/binoculars/backend.py @@ -8,8 +8,9 @@ class ProjectionBase(util.ConfigurableObject): self.config.limits = util.parse_pairs(config.pop('limits', None))#Optional, set the limits of the space object in projected coordinates. Syntax is same as numpy e.g. '0.3:-0.6, -1:5, :' labels = self.get_axis_labels() if not self.config.limits is None: - if len(self.config.limits) != len(labels): - raise errors.ConfigError('dimension mismatch between projection axes ({0}) and limits specification ({1}) in {2}'.format(labels, self.config.limits, self.__class__.__name__)) + for lim in self.config.limits: + if len(lim) != len(labels): + raise errors.ConfigError('dimension mismatch between projection axes ({0}) and limits specification ({1}) in {2}'.format(labels, self.config.limits, self.__class__.__name__)) if ',' in res: self.config.resolution = util.parse_tuple(res, type=float) if not len(labels) == len(self.config.resolution): diff --git a/binoculars/dispatcher.py b/binoculars/dispatcher.py index 3b6a409..b491cd5 100755 --- a/binoculars/dispatcher.py +++ b/binoculars/dispatcher.py @@ -8,7 +8,7 @@ from . import util, errors, space class Destination(object): - type = filename = overwrite = value = config = None + type = filename = overwrite = value = config = limits = None opts = {} def set_final_filename(self, filename, overwrite): @@ -20,6 +20,9 @@ class Destination(object): if opts is not False: self.opts = opts + def set_limits(self, limits): + self.limits = limits + def set_config(self, conf): self.config = conf @@ -30,26 +33,38 @@ class Destination(object): def set_memory(self): self.type = 'memory' - def store(self, space): + def store(self, verse): self.value = None + if verse.dimension == 0: + raise ValueError('Empty output, Multiverse contains no spaces') if self.type == 'memory': - self.value = space + self.value = verse elif self.type == 'tmp': - space.tofile(self.filename) + verse.tofile(self.filename) elif self.type == 'final': - fn = self.final_filename() - space.config = self.config - space.tofile(fn) + for sp, fn in zip(verse.spaces, self.final_filenames()): + sp.config = self.config + sp.tofile(fn) def retrieve(self): if self.type == 'memory': return self.value - def final_filename(self): - fn = self.filename.format(**self.opts) - if not self.overwrite: - fn = util.find_unused_filename(fn) - return fn + def final_filenames(self): + fns = [] + if not self.limits == None: + base, ext = os.path.splitext(self.filename) + for limlabel in util.limit_to_filelabel(self.limits): + fn = (base + '_' + limlabel + ext).format(**self.opts) + if not self.overwrite: + fn = util.find_unused_filename(fn) + fns.append(fn) + else: + fn = self.filename.format(**self.opts) + if not self.overwrite: + fn = util.find_unused_filename(fn) + fns.append(fn) + return fns class DispatcherBase(util.ConfigurableObject): def __init__(self, config, main): @@ -66,15 +81,21 @@ class DispatcherBase(util.ConfigurableObject): self.config.port = config.pop('port', None)# port of the running gui awaiting the spaces self.config.send_to_gui = util.parse_bool(config.pop('send_to_gui', 'false'))#previewing the data, if true, also specify host and port - def send(self, spaces):#provides the possiblity to send the results to the gui over the network + def send(self, verses):#provides the possiblity to send the results to the gui over the network if self.config.send_to_gui or (self.config.host is not None and self.config.host is not None):#only continue of ip is specified and send_to_server is flagged - for sp in spaces: - if isinstance(sp, space.Space): - util.socket_send(self.config.host, int(self.config.port), util.serialize(sp, ','.join(self.main.config.command))) - yield sp + for M in verses: + if self.config.destination.limits is None: + sp = M.spaces[0] + if isinstance(sp, space.Space): + util.socket_send(self.config.host, int(self.config.port), util.serialize(sp, ','.join(self.main.config.command))) + else: + for sp, label in zip(M.spaces, util.limit_to_filelabel(self.config.destination.limits)): + if isinstance(sp, space.Space): + util.socket_send(self.config.host, int(self.config.port), util.serialize(sp, '{0}_{1}'.format(','.join(self.main.config.command), label))) + yield M else: - for sp in spaces: - yield sp + for M in verses: + yield M def has_specific_task(self): return False @@ -210,11 +231,11 @@ class Oar(ReentrantBase): if self.config.action != 'process' or (not self.config.jobs and not self.config.sum) or command: raise errors.SubprocessError("invalid command, too many parameters or no jobs/sum given") - jobs = sum = space.EmptySpace() + jobs = sum = space.EmptyVerse() if self.config.jobs: - jobs = space.sum(self.send(self.main.process_job(job) for job in self.config.jobs)) + jobs = space.verse_sum(self.send(self.main.process_job(job) for job in self.config.jobs)) if self.config.sum: - sum = space.chunked_sum(space.Space.fromfile(src) for src in util.yield_when_exists(self.config.sum)) + sum = space.chunked_sum(space.Multiverse.fromfile(src) for src in util.yield_when_exists(self.config.sum)) self.config.destination.store(jobs + sum) ### calling OAR diff --git a/binoculars/main.py b/binoculars/main.py index bac4b7e..25af336 100755 --- a/binoculars/main.py +++ b/binoculars/main.py @@ -42,10 +42,12 @@ class Main(object): self.input = backend.get_input(config.input) self.dispatcher.config.destination.set_final_options(self.input.get_destination_options(command)) + if 'limits' in self.config.projection: + self.dispatcher.config.destination.set_limits(self.config.projection['limits']) if command: self.dispatcher.config.destination.set_config(spaceconf) self.run(command) - + @classmethod def from_args(cls, args): args = parse_args(args) @@ -86,13 +88,17 @@ class Main(object): def generator(): res = self.projection.config.resolution labels = self.projection.get_axis_labels() - for intensity, weights, params in self.input.process_job(job): + for intensity, weights, params in self.input.process_job(job): coords = self.projection.project(*params) - yield space.Space.from_image(res, labels, coords, intensity, weights, limits = self.projection.config.limits) - jobspace = space.chunked_sum(generator(), chunksize=25) - if isinstance(jobspace, space.Space): - jobspace.metadata.add_dataset(self.input.metadata) - return jobspace + if self.projection.config.limits == None: + yield space.Multiverse((space.Space.from_image(res, labels, coords, intensity, weights = weights), )) + else: + yield space.Multiverse(space.Space.from_image(res, labels, coords, intensity, weights = weights, limits = limits) for limits in self.projection.config.limits) + jobverse = space.chunked_sum(generator(), chunksize=25) + for sp in jobverse.spaces: + if isinstance(sp, space.Space): + sp.metadata.add_dataset(self.input.metadata) + return jobverse def clone_config(self): config = util.ConfigSectionGroup() @@ -122,14 +128,17 @@ class Split(Main): #completely ignores the dispatcher, just yields a space per i def process_job(self, job): res = self.projection.config.resolution labels = self.projection.get_axis_labels() - for intensity, weights, params in self.input.process_job(job): + for intensity, weights, params in self.input.process_job(job): coords = self.projection.project(*params) - yield space.Space.from_image(res, labels, coords, intensity, weights, limits = self.projection.config.limits) - + if self.projection.config.limits == None: + yield space.Multiverse(space.Space.from_image(res, labels, coords, intensity, weights = weights)) + else: + yield space.Multiverse(space.Space.from_image(res, labels, coords, intensity, weights = weights, limits = limits) for limits in self.projection.config.limits) def run(self): for job in self.input.generate_jobs(self.command): - for space in self.process_job(job): - yield space + for verse in self.process_job(job): + yield verse + diff --git a/binoculars/space.py b/binoculars/space.py index a52a1cf..0f29e24 100755 --- a/binoculars/space.py +++ b/binoculars/space.py @@ -1,6 +1,5 @@ import itertools import numbers -import __builtin__ import numpy import h5py @@ -321,6 +320,7 @@ class EmptySpace(object): return '{0.__class__.__name__}'.format(self) + class Space(object): """Main data-storing object in BINoculars. Data is represented on an n-dimensional rectangular grid. Per grid point, @@ -344,7 +344,7 @@ class Space(object): self.photons = numpy.zeros([len(ax) for ax in self.axes], order='C') self.contributions = numpy.zeros(self.photons.shape, order='C') - + @property def dimension(self): return self.axes.dimension @@ -375,7 +375,7 @@ class Space(object): @property def metadata(self): - """util.ConfigFile instance describing configuration file used to create this Space instance""" + """util.MetaData instance describing metadata used to create this Space instance""" return self._metadata @metadata.setter @@ -438,7 +438,6 @@ class Space(object): newspace = self.__class__(newaxes, self.config, self.metadata) newspace.photons = self.photons.sum(axis=index) newspace.contributions = self.contributions.sum(axis=index) - if more_axes: return newspace.project(more_axes[0], *more_axes[1:]) else: @@ -634,6 +633,7 @@ class Space(object): """Store Space in HDF5 file.""" with util.atomic_write(filename) as tmpname: with util.open_h5py(tmpname, 'w') as fp: + fp.attrs['type'] = 'Space' self.config.tofile(fp) self.axes.tofile(fp) self.metadata.tofile(fp) @@ -673,6 +673,78 @@ class Space(object): raise errors.HDF5FileError("unable to open '{0}' as HDF5 file (original error: {1!r})".format(file, e)) return space +class Multiverse(object): + """A collection of spaces with basic support for addition. + Only to be used when processing data. This makes it possible to + process multiple limit sets in a combination of scans""" + + def __init__(self, spaces): + self.spaces = list(spaces) + + @property + def dimension(self): + return len(self.spaces) + + def __add__(self, other): + if not isinstance(other, Multiverse): + return NotImplemented + if not self.dimension == other.dimension: + raise ValueError('cannot add multiverses with different dimensionality') + return self.__class__(tuple(s + o for s,o in zip(self.spaces, other.spaces))) + + def __iadd__(self, other): + if not isinstance(other, Multiverse): + return NotImplemented + if not self.dimension == other.dimension: + raise ValueError('cannot add multiverses with different dimensionality') + for index, o in enumerate(other.spaces): + self.spaces[index] += o + return self + + def tofile(self, filename): + with util.atomic_write(filename) as tmpname: + with util.open_h5py(tmpname, 'w') as fp: + fp.attrs['type'] = 'Multiverse' + for index, sp in enumerate(self.spaces): + spacegroup = fp.create_group('space_{0}'.format(index)) + sp.tofile(spacegroup) + + @classmethod + def fromfile(cls, file): + """Load Multiverse from HDF5 file.""" + try: + with util.open_h5py(file, 'r') as fp: + if 'type' in fp.attrs: + if fp.attrs['type'] == 'Multiverse': + return cls(tuple(Space.fromfile(fp[label]) for label in fp)) + else: + raise TypeError('This is not a multiverse') + else: + raise TypeError('This is not a multiverse') + except IOError as e: + raise errors.HDF5FileError("unable to open '{0}' as HDF5 file (original error: {1!r})".format(file, e)) + + def __repr__(self): + return '{0.__class__.__name__}\n{1}'.format(self, self.spaces) + +class EmptyVerse(object): + """Convenience object for sum() and friends. Treated as zero for addition.""" + + def __add__(self, other): + if not isinstance(other, Multiverse): + return NotImplemented + return other + + def __radd__(self, other): + if not isinstance(other, Multiverse): + return NotImplemented + return other + + def __iadd__(self, other): + if not isinstance(other, Multiverse): + return NotImplemented + return other + def union_axes(axes): axes = tuple(axes) if len(axes) == 1: @@ -717,15 +789,19 @@ def sum(spaces): newspace += space return newspace +def verse_sum(verses): + i = iter(M.spaces for M in verses) + return Multiverse(sum(spaces) for spaces in itertools.izip(*i)) + # hybrid sum() / __iadd__() -def chunked_sum(spaces, chunksize=10): - """Calculate sum of iterable of Space instances. Creates intermediate sums to avoid growing a large space at every summation. - - spaces iterable of Space instances - chunksize number of Space instances in each intermediate sum""" - result = EmptySpace() - for chunk in util.grouper(spaces, chunksize): - result += sum(space for space in chunk) +def chunked_sum(verses, chunksize=10): + """Calculate sum of iterable of Multiverse instances. Creates intermediate sums to avoid growing a large space at every summation. + + verses iterable of Multiverse instances + chunksize number of Multiverse instances in each intermediate sum""" + result = EmptyVerse() + for chunk in util.grouper(verses, chunksize): + result += verse_sum(M for M in chunk) return result def iterate_over_axis(space, axis, resolution = None): @@ -811,3 +887,4 @@ def make_compatible(spaces): return tuple(space.reorder(ax0).rebin2(resmax) for space in spaces) + diff --git a/binoculars/util.py b/binoculars/util.py index 3dada8c..940089b 100755 --- a/binoculars/util.py +++ b/binoculars/util.py @@ -20,6 +20,7 @@ import json import socket import StringIO import binascii +import re ### ARGUMENT HANDLING @@ -306,23 +307,28 @@ def parse_bool(s): def parse_pairs(s): if not s: return s - pairs = s.split(',') limits = [] - for pair in pairs: - mi, ma = tuple(m.strip() for m in pair.split(':')) - if mi == '' and ma == '': - limits.append(slice(None)) - elif mi == '': - limits.append(slice(None, float(ma))) - elif ma == '': - limits.append(slice(float(mi), None)) - else: - if float(ma) < float(mi): - raise ValueError("invalid input. maximum is larger than minimum: '{0}'".format(s)) + for lim in re.findall('\[(.*?)\]', s): + parsed = [] + for pair in re.split(',', lim): + mi, ma = tuple(m.strip() for m in pair.split(':')) + if mi == '' and ma == '': + parsed.append(slice(None)) + elif mi == '': + parsed.append(slice(None, float(ma))) + elif ma == '': + parsed.append(slice(float(mi), None)) else: - limits.append(slice(float(mi), float(ma))) + if float(ma) < float(mi): + raise ValueError("invalid input. maximum is larger than minimum: '{0}'".format(s)) + else: + parsed.append(slice(float(mi), float(ma))) + limits.append(parsed) return limits +def limit_to_filelabel(s): + return tuple('[{0}]'.format(lim.replace('-', 'm').replace(':', '-').replace(' ','')) for lim in re.findall('\[(.*?)\]', s)) + class MetaBase(object): def __init__(self, label = None, section = None): self.sections = [] |