summaryrefslogtreecommitdiff
path: root/elki/src/main/java/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/LongDynamicHistogram.java
diff options
context:
space:
mode:
authorAndrej Shadura <andrewsh@debian.org>2019-03-09 22:30:41 +0000
committerAndrej Shadura <andrewsh@debian.org>2019-03-09 22:30:41 +0000
commit38212b3127e90751fb39cda34250bc11be62b76c (patch)
treedc1397346030e9695bd763dddc93b3be527cd643 /elki/src/main/java/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/LongDynamicHistogram.java
parent337087b668d3a54f3afee3a9adb597a32e9f7e94 (diff)
Import Upstream version 0.7.0
Diffstat (limited to 'elki/src/main/java/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/LongDynamicHistogram.java')
-rw-r--r--elki/src/main/java/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/LongDynamicHistogram.java250
1 files changed, 250 insertions, 0 deletions
diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/LongDynamicHistogram.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/LongDynamicHistogram.java
new file mode 100644
index 00000000..78385a30
--- /dev/null
+++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/LongDynamicHistogram.java
@@ -0,0 +1,250 @@
+package de.lmu.ifi.dbs.elki.utilities.datastructures.histogram;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2015
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+import de.lmu.ifi.dbs.elki.math.scales.LinearScale;
+import de.lmu.ifi.dbs.elki.utilities.BitsUtil;
+
+/**
+ * A flexible histogram storing long, that can dynamically adapt the number of
+ * bins to the data fed into the histogram.
+ *
+ * @author Erich Schubert
+ */
+public class LongDynamicHistogram extends LongStaticHistogram {
+ /**
+ * Cache for data to be inserted.
+ */
+ private double[] cachec;
+
+ /**
+ * Cache for data to be inserted.
+ */
+ private long[] cachev;
+
+ /**
+ * Cache fill size
+ */
+ private int cachefill;
+
+ /**
+ * Destination (minimum) size of the structure.
+ * At most destsize * 2 bins are allowed.
+ */
+ private int destsize;
+
+ /**
+ * Constructor.
+ *
+ * @param bins Design number of bins - may become twice as large!
+ */
+ public LongDynamicHistogram(int bins) {
+ super(-1, 0.0, 1.0);
+ this.destsize = bins;
+ cachec = new double[this.destsize << CACHE_SHIFT];
+ cachev = new long[this.destsize << CACHE_SHIFT];
+ cachefill = 0;
+ }
+
+ /**
+ * Materialize the histogram from the cache.
+ */
+ void materialize() {
+ // already materialized?
+ if (cachefill < 0) {
+ return;
+ }
+ // Compute minimum and maximum
+ double min = Double.MAX_VALUE, max = Double.MIN_VALUE;
+ for (int i = 0; i < cachefill; i++) {
+ min = Math.min(min, cachec[i]);
+ max = Math.max(max, cachec[i]);
+ }
+ // use the LinearScale magic to round to "likely suiteable" step sizes.
+ // TODO: extract into a reusable function?
+ LinearScale scale = new LinearScale(min, max);
+ min = scale.getMin();
+ max = scale.getMax();
+ this.base = min;
+ this.max = max;
+ this.binsize = (max - min) / this.destsize;
+ // initialize array
+ this.data = new long[this.destsize << 1];
+ size = destsize;
+ // re-insert data we have
+ final int end = cachefill;
+ cachefill = -1; // So reinsert works!
+ for (int i = 0; i < end; i++) {
+ increment(cachec[i], cachev[i]);
+ }
+ // delete cache, signal that we're initialized
+ cachec = null;
+ cachev = null;
+ }
+
+ @Override
+ public long get(double coord) {
+ materialize();
+ testResample(coord);
+ return super.get(coord);
+ }
+
+ /**
+ * Put fresh data into the histogram (or into the cache)
+ *
+ * @param coord Coordinate
+ * @param value Value
+ */
+ @Override
+ public void increment(double coord, long value) {
+ // Store in cache
+ if (cachefill >= 0) {
+ if (cachefill < cachec.length) {
+ cachec[cachefill] = coord;
+ cachev[cachefill] = value;
+ cachefill ++;
+ return;
+ } else {
+ materialize();
+ // But continue below!
+ }
+ }
+ // Check if we need to resample to accomodate this bin.
+ testResample(coord);
+ // super class will handle histogram resizing / shifting
+ super.increment(coord, value);
+ }
+
+ /**
+ * Test (and perform) downsampling when neede.
+ *
+ * @param coord coordinate to accomodate.
+ */
+ private void testResample(double coord) {
+ final int bin = getBinNr(coord);
+ final int sizereq, off;
+ if (bin < 0) {
+ sizereq = size - bin;
+ off = -bin;
+ } else if (bin >= data.length) {
+ sizereq = bin + 1;
+ off = 0;
+ } else {
+ // Within the designated size - nothing to do.
+ return;
+ }
+ if (sizereq < data.length) {
+ // Accomodate by shifting. Let super do the job in {@link #get}
+ return;
+ }
+ // Resampling, eventually by multiple levels.
+ final int levels = BitsUtil.magnitude(sizereq / this.destsize) - 1;
+ assert (levels > 0) : "No resampling required?!? sizereq=" + sizereq + " destsize=" + destsize + " array=" + data.length;
+ final int step = 1 << levels;
+
+ // We want to map [i ... i+step[ -> (i+off)/step
+ // Fix point: i = (i+off)/step; i*(step-1)=off; i=off/(step-1)
+ final int fixpoint = off / (step - 1);
+ {
+ // Start positions for in-place bottom-up downsampling.
+ int oup = (fixpoint >= 0) ? fixpoint : 0;
+ int inp = (oup << levels) - off;
+ assert (-step < inp && inp <= oup && oup < inp + step) : (inp + " -> " + oup + " s=" + step + " o=" + off + " l=" + levels);
+ for (; inp < size; inp += step, oup++) {
+ assert (oup < inp + step && oup < data.length);
+ data[oup] = downsample(data, Math.max(0, inp), Math.min(size, inp + step), step);
+ }
+ // Clean upwards
+ for (; oup < data.length; oup++) {
+ data[oup] = 0;
+ }
+ }
+ if(off >= step) {
+ // Start positions for in-place downsampling top-down:
+ int oup = (fixpoint - 1 < size) ? fixpoint - 1 : size - 1;
+ int inp = (oup << levels) - off;
+ assert (oup > inp) : (inp + " -> " + oup + " s=" + step + " o=" + off + " l=" + levels);
+ for (; inp > -step; inp -= step, oup--) {
+ assert (oup >= inp && oup >= 0);
+ data[oup] = downsample(data, Math.max(0, inp), Math.min(size, inp + step), step);
+ }
+ for (; oup >= 0; oup--) {
+ data[oup] = 0;
+ }
+ }
+ // recalculate histogram base.
+ base = base - (offset + off) * binsize;
+ offset = 0;
+ size = (size + 1) >> levels;
+ binsize = binsize * (1 << levels);
+ max = base + binsize * size;
+ }
+
+ @Override
+ public Iter iter() {
+ materialize();
+ return super.iter();
+ }
+
+ @Override
+ public int getNumBins() {
+ materialize();
+ return super.getNumBins();
+ }
+
+ @Override
+ public double getBinsize() {
+ materialize();
+ return super.getBinsize();
+ }
+
+ @Override
+ public double getCoverMinimum() {
+ materialize();
+ return super.getCoverMinimum();
+ }
+
+ @Override
+ public double getCoverMaximum() {
+ materialize();
+ return super.getCoverMaximum();
+ }
+
+ /**
+ * Perform downsampling on a number of bins.
+ *
+ * @param data Data array (needs cast!)
+ * @param start Interval start
+ * @param end Interval end (exclusive)
+ * @param size Intended size - extra bins are assumed to be empty, should be a
+ * power of two
+ * @return New bin value
+ */
+ protected long downsample(long[] data, int start, int end, int size) {
+ long sum = 0;
+ for (int i = start; i < end; i++) {
+ sum += data[i];
+ }
+ return sum;
+ }
+}