diff options
author | Erich Schubert <erich@debian.org> | 2012-12-14 20:45:15 +0100 |
---|---|---|
committer | Andrej Shadura <andrewsh@debian.org> | 2019-03-09 22:30:35 +0000 |
commit | 357b2761a2c0ded8cad5e4d3c1e667b7639ff7a6 (patch) | |
tree | 3dd8947bb70a67c221adc3cd4359ba1d385e2f3c /src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/LongDynamicHistogram.java | |
parent | 4343785ebed9d4145f417d86d581f18a0d31e4ac (diff) | |
parent | b7b404fd7a726774d442562d11659d7b5368cdb9 (diff) |
Import Debian changes 0.5.5-1
elki (0.5.5-1) unstable; urgency=low
* New upstream release: 0.5.5 interim release.
Diffstat (limited to 'src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/LongDynamicHistogram.java')
-rw-r--r-- | src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/LongDynamicHistogram.java | 248 |
1 files changed, 248 insertions, 0 deletions
diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/LongDynamicHistogram.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/LongDynamicHistogram.java new file mode 100644 index 00000000..676a5e8f --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/LongDynamicHistogram.java @@ -0,0 +1,248 @@ +package de.lmu.ifi.dbs.elki.utilities.datastructures.histogram; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2012 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +import de.lmu.ifi.dbs.elki.math.scales.LinearScale; +import de.lmu.ifi.dbs.elki.utilities.BitsUtil; + +/** + * A flexible histogram storing long, that can dynamically adapt the number of + * bins to the data fed into the histogram. + * + * @author Erich Schubert + */ +public class LongDynamicHistogram extends LongStaticHistogram { + /** + * Cache for data to be inserted. + */ + private double[] cachec; + + /** + * Cache for data to be inserted. + */ + private long[] cachev; + + /** + * Cache fill size + */ + private int cachefill; + + /** + * Destination (minimum) size of the structure. + * At most destsize * 2 bins are allowed. + */ + private int destsize; + + /** + * Constructor. + * + * @param bins Design number of bins - may become twice as large! + */ + public LongDynamicHistogram(int bins) { + super(-1, 0.0, 1.0); + this.destsize = bins; + cachec = new double[this.destsize << CACHE_SHIFT]; + cachev = new long[this.destsize << CACHE_SHIFT]; + cachefill = 0; + } + + /** + * Materialize the histogram from the cache. + */ + void materialize() { + // already materialized? + if (cachefill < 0) { + return; + } + // Compute minimum and maximum + double min = Double.MAX_VALUE, max = Double.MIN_VALUE; + for (int i = 0; i < cachefill; i++) { + min = Math.min(min, cachec[i]); + max = Math.max(max, cachec[i]); + } + // use the LinearScale magic to round to "likely suiteable" step sizes. + // TODO: extract into a reusable function? + LinearScale scale = new LinearScale(min, max); + min = scale.getMin(); + max = scale.getMax(); + this.base = min; + this.max = max; + this.binsize = (max - min) / this.destsize; + // initialize array + this.data = new long[this.destsize << 1]; + size = destsize; + // re-insert data we have + final int end = cachefill; + cachefill = -1; // So reinsert works! + for (int i = 0; i < end; i++) { + increment(cachec[i], cachev[i]); + } + // delete cache, signal that we're initialized + cachec = null; + cachev = null; + } + + @Override + public long get(double coord) { + materialize(); + testResample(coord); + return super.get(coord); + } + + /** + * Put fresh data into the histogram (or into the cache) + * + * @param coord Coordinate + * @param value Value + */ + @Override + public void increment(double coord, long value) { + // Store in cache + if (cachefill >= 0) { + if (cachefill < cachec.length) { + cachec[cachefill] = coord; + cachev[cachefill] = value; + cachefill ++; + return; + } else { + materialize(); + // But continue below! + } + } + // Check if we need to resample to accomodate this bin. + testResample(coord); + // super class will handle histogram resizing / shifting + super.increment(coord, value); + } + + /** + * Test (and perform) downsampling when neede. + * + * @param coord coordinate to accomodate. + */ + private void testResample(double coord) { + final int bin = getBinNr(coord); + final int sizereq, off; + if (bin < 0) { + sizereq = size - bin; + off = -bin; + } else if (bin >= data.length) { + sizereq = bin + 1; + off = 0; + } else { + // Within the designated size - nothing to do. + return; + } + if (sizereq < data.length) { + // Accomodate by shifting. Let super do the job in {@link #get} + return; + } + // Resampling, eventually by multiple levels. + final int levels = BitsUtil.magnitude(sizereq / this.destsize) - 1; + assert (levels > 0) : "No resampling required?!? sizereq=" + sizereq + " destsize=" + destsize + " array=" + data.length; + final int step = 1 << levels; + + final int fixpoint = off / (step - 1); + { + // Start positions for in-place bottom-up downsampling. + int oup = fixpoint; + int inp = (oup << levels) - off; + assert (-step < inp && inp <= oup && oup < inp + step) : (inp + " -> " + oup + " s=" + step + " o=" + off + " l=" + levels); + for (; inp < size; inp += step, oup++) { + assert (oup < inp + step && oup < data.length); + data[oup] = downsample(data, Math.max(0, inp), Math.min(size, inp + step), step); + } + // Clean upwards + for (; oup < data.length; oup++) { + data[oup] = 0; + } + } + if (off > 0) { + // Start positions for in-place downsampling top-down: + int oup = fixpoint - 1; + int inp = (oup << levels) - off; + assert (oup > inp) : (inp + " -> " + oup + " s=" + step + " o=" + off + " l=" + levels); + for (; inp > -step; inp -= step, oup--) { + assert (oup >= inp && oup >= 0); + data[oup] = downsample(data, Math.max(0, inp), Math.min(size, inp + step), step); + } + for (; oup >= 0; oup--) { + data[oup] = 0; + } + } + // recalculate histogram base. + base = base - (offset + off) * binsize; + offset = 0; + size = (size + 1) >> levels; + binsize = binsize * (1 << levels); + max = base + binsize * size; + } + + @Override + public Iter iter() { + materialize(); + return super.iter(); + } + + @Override + public int getNumBins() { + materialize(); + return super.getNumBins(); + } + + @Override + public double getBinsize() { + materialize(); + return super.getBinsize(); + } + + @Override + public double getCoverMinimum() { + materialize(); + return super.getCoverMinimum(); + } + + @Override + public double getCoverMaximum() { + materialize(); + return super.getCoverMaximum(); + } + + /** + * Perform downsampling on a number of bins. + * + * @param data Data array (needs cast!) + * @param start Interval start + * @param end Interval end (exclusive) + * @param size Intended size - extra bins are assumed to be empty, should be a + * power of two + * @return New bin value + */ + protected long downsample(long[] data, int start, int end, int size) { + long sum = 0; + for (int i = start; i < end; i++) { + sum += data[i]; + } + return sum; + } +} |