package de.lmu.ifi.dbs.elki.math.statistics.distribution; /* This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures Copyright (C) 2015 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team This program is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more details. You should have received a copy of the GNU Affero General Public License along with this program. If not, see . */ import java.util.Random; import de.lmu.ifi.dbs.elki.math.MathUtil; import de.lmu.ifi.dbs.elki.math.Primes; import de.lmu.ifi.dbs.elki.math.random.RandomFactory; import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization; import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter; /** * Halton sequences are a pseudo-uniform distribution. The data is actually too * regular for a true uniform distribution, but as such will of course often * appear to be uniform. * * Technically, they are based on Van der Corput sequence and the Von Neumann * Katutani transformation. These produce a series of integers which then are * converted to floating point values. * * To randomize, we just choose a random starting position, as indicated by * * Reference: *

* Randomized halton sequences
* Wang, X. and Hickernell, F.J.
* Mathematical and Computer Modelling Vol. 32 (7) *

* * Important note: this code hasn't been double checked yet. While it * probably works for some simple cases such as example data set generation, do * not rely on it for e.g. quasi monte carlo methods without * double-checking the quality, and looking at more advanced methods! * * Let me repeat this: this code was written to generate toy datasets. It * may have deficits for other uses! There is a high chance it will * produce correlated data when used for more than one dimension. - for toy * data sets, try different random seeds until you find one that works for you. * * TODO: find an improved algorithm that takes care of a better randomization, * for example by adding scrambling. * * @author Erich Schubert * @since 0.5.5 */ @Reference(title = "Randomized halton sequences", authors = "Wang, X. and Hickernell, F.J.", booktitle = "Mathematical and Computer Modelling Vol. 32 (7)", url = "http://dx.doi.org/10.1016/S0895-7177(00)00178-3") public class HaltonUniformDistribution implements Distribution { /** * Minimum */ private double min; /** * Maximum */ private double max; /** * Len := max - min */ private double len; /** * Maximum number of iterations of fast variant */ private static final int MAXFAST = 1000; /** * Threshold */ private static final double ALMOST_ONE = 1.0 - 1e-10; /** * Base value */ final short base; /** * Inverse of base, for faster division by multiplication. */ final double invbase; /** * Logarithm of base. */ final double logbase; /** * Maximum integer to use */ final int maxi; /** * Counter, for max iterations of fast function. */ int counter = 0; /** * Current value */ double current; /** * Integer inverse */ long inverse; /** * Constructor for a halton pseudo uniform distribution on the interval [min, * max[ * * @param min Minimum value * @param max Maximum value * @param base Base value * @param seed Random seed (starting value) */ public HaltonUniformDistribution(double min, double max, int base, double seed) { super(); // Swap parameters if they were given incorrectly. if (min > max) { double tmp = min; min = max; max = tmp; } this.min = min; this.max = max; this.len = max - min; this.base = (short) base; this.invbase = 1.0 / base; this.logbase = Math.log(base); // 32 bit * log(2) / log(base) this.maxi = (int) (32.0 * MathUtil.LOG2 / logbase); this.current = seed; this.inverse = inverse(seed); } /** * Constructor for a halton pseudo uniform distribution on the interval [min, * max[ * * @param min Minimum value * @param max Maximum value */ public HaltonUniformDistribution(double min, double max) { this(min, max, new Random()); } /** * Constructor for a halton pseudo uniform distribution on the interval [min, * max[ * * @param min Minimum value * @param max Maximum value * @param rnd Random generator */ public HaltonUniformDistribution(double min, double max, Random rnd) { this(min, max, choosePrime(rnd), rnd.nextDouble()); } /** * Constructor for a halton pseudo uniform distribution on the interval [min, * max[ * * @param min Minimum value * @param max Maximum value * @param rnd Random generator */ public HaltonUniformDistribution(double min, double max, RandomFactory rnd) { this(min, max, rnd.getRandom()); } /** * Choose a random prime. We try to avoid the later primes, as they are known * to cause too correlated data. * * @param rnd Random generator * @return Prime */ private static int choosePrime(Random rnd) { return Primes.FIRST_PRIMES[rnd.nextInt(10)]; } @Override public double pdf(double val) { if (val < min || val >= max) { return 0.0; } return 1.0 / len; } @Override public double cdf(double val) { if (val < min) { return 0.0; } if (val > max) { return 1.0; } return (val - min) / len; } @Override public double quantile(double val) { return min + len * val; } /** * Compute the inverse with respect to the given base. * * @param current Current value * @return Integer inverse */ private long inverse(double current) { // Represent to base b. short[] digits = new short[maxi]; int j; for (j = 0; j < maxi; j++) { current *= base; digits[j] = (short) current; current -= digits[j]; if (current <= 1e-10) { break; } } long inv = 0; for (j = maxi - 1; j >= 0; j--) { inv = inv * base + digits[j]; } return inv; } /** * Compute the radical inverse of i. * * @param i Input long value * @return Double radical inverse */ private double radicalInverse(long i) { double digit = 1.0 / (double) base; double radical = digit; double inverse = 0.0; while (i > 0) { inverse += digit * (double) (i % base); digit *= radical; i /= base; } return inverse; } /** * Compute the next radical inverse. * * @return Next inverse */ private double nextRadicalInverse() { counter++; // Do at most MAXFAST appromate steps if (counter >= MAXFAST) { counter = 0; inverse += MAXFAST; current = radicalInverse(inverse); return current; } // Fast approximation: double nextInverse = current + invbase; if (nextInverse < ALMOST_ONE) { current = nextInverse; return current; } else { double digit1 = invbase, digit2 = invbase * invbase; while (current + digit2 >= ALMOST_ONE) { digit1 = digit2; digit2 *= invbase; } current += (digit1 - 1.0) + digit2; return current; } } @Override public double nextRandom() { return min + nextRadicalInverse() * len; } @Override public String toString() { return "HaltonUniformDistribution(min=" + min + ", max=" + max + ")"; } /** * @return the minimum value */ public double getMin() { return min; } /** * @return the maximum value */ public double getMax() { return max; } /** * Parameterization class * * TODO: allow manual parameterization of sequence parameters! * * @author Erich Schubert * * @apiviz.exclude */ public static class Parameterizer extends AbstractDistribution.Parameterizer { /** Parameters. */ double min, max; @Override protected void makeOptions(Parameterization config) { super.makeOptions(config); DoubleParameter minP = new DoubleParameter(UniformDistribution.Parameterizer.MIN_ID); if (config.grab(minP)) { min = minP.doubleValue(); } DoubleParameter maxP = new DoubleParameter(UniformDistribution.Parameterizer.MAX_ID); if (config.grab(maxP)) { max = maxP.doubleValue(); } } @Override protected HaltonUniformDistribution makeInstance() { return new HaltonUniformDistribution(min, max, rnd); } } }