diff options
Diffstat (limited to 'src/de/lmu/ifi/dbs/elki/math/statistics/distribution')
9 files changed, 459 insertions, 20 deletions
diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/ChiDistribution.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/ChiDistribution.java index 84c86e98..5dc5b399 100644 --- a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/ChiDistribution.java +++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/ChiDistribution.java @@ -91,7 +91,7 @@ public class ChiDistribution implements DistributionWithRandom { * @return CDF value */ public static double cdf(double val, double dof) { - return GammaDistribution.regularizedGammaP(dof / 2, val * val / 2); + return GammaDistribution.regularizedGammaP(dof * .5, val * val * .5); } // FIXME: implement! diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/ChiSquaredDistribution.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/ChiSquaredDistribution.java index 8555afd3..efa24079 100644 --- a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/ChiSquaredDistribution.java +++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/ChiSquaredDistribution.java @@ -65,8 +65,8 @@ public class ChiSquaredDistribution extends GammaDistribution { if(x == 0) { return 0.0; } - final double k = dof / 2; - if(k == 1.0) { + final double k = dof * .5; + if(Math.abs(k - 1.0) < Double.MIN_NORMAL) { return Math.exp(-x * 2.0) * 2.0; } return Math.exp((k - 1.0) * Math.log(x * 2.0) - x * 2.0 - logGamma(k)) * 2.0; diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/Distribution.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/Distribution.java index 5b6cd286..ad4ef944 100644 --- a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/Distribution.java +++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/Distribution.java @@ -37,7 +37,7 @@ public interface Distribution { * @param val existing value * @return distribution density */ - public double pdf(double val); + double pdf(double val); /** * Return the cumulative density function at the given value. @@ -45,7 +45,7 @@ public interface Distribution { * @param val existing value * @return cumulative density */ - public double cdf(double val); + double cdf(double val); /** * Quantile aka probit (for normal) aka inverse CDF (invcdf, cdf^-1) function. @@ -53,7 +53,7 @@ public interface Distribution { * @param val Quantile to find * @return Quantile position */ - public double quantile(double val); + double quantile(double val); /** * Describe the distribution @@ -61,5 +61,5 @@ public interface Distribution { * @return description */ @Override - public String toString(); -} + String toString(); +}
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/DistributionWithRandom.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/DistributionWithRandom.java index af272528..02f5002f 100644 --- a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/DistributionWithRandom.java +++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/DistributionWithRandom.java @@ -33,5 +33,5 @@ public interface DistributionWithRandom extends Distribution { * * @return new random value */ - public double nextRandom(); -} + double nextRandom(); +}
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/ExponentialDistribution.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/ExponentialDistribution.java new file mode 100644 index 00000000..866f40d6 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/ExponentialDistribution.java @@ -0,0 +1,126 @@ +package de.lmu.ifi.dbs.elki.math.statistics.distribution; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2012 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +import java.util.Random; + +/** + * Exponential distribution. + * + * @author Erich Schubert + */ +public class ExponentialDistribution implements DistributionWithRandom { + /** + * Random generator. + */ + Random rnd; + + /** + * Rate, inverse of mean + */ + double rate; + + /** + * Constructor. + * + * @param rate Rate parameter (1/scale) + */ + public ExponentialDistribution(double rate) { + this(rate, new Random()); + } + + /** + * Constructor. + * + * @param rate Rate parameter (1/scale) + * @param random Random generator + */ + public ExponentialDistribution(double rate, Random random) { + super(); + this.rate = rate; + this.rnd = random; + } + + @Override + public double pdf(double val) { + return rate * Math.exp(-rate * val); + } + + /** + * PDF, static version + * + * @param val Value to compute PDF at + * @param rate Rate parameter (1/scale) + * @return probability density + */ + public static double pdf(double val, double rate) { + return rate * Math.exp(-rate * val); + } + + @Override + public double cdf(double val) { + return 1 - Math.exp(-rate * val); + } + + /** + * Cumulative density, static version + * + * @param val Value to compute CDF at + * @param rate Rate parameter (1/scale) + * @return cumulative density + */ + public static double cdf(double val, double rate) { + return 1 - Math.exp(-rate * val); + } + + @Override + public double quantile(double val) { + return -Math.log(1 - val) / rate; + } + + /** + * Quantile function, static version + * + * @param val Value to compute quantile for + * @param rate Rate parameter + * @return Quantile + */ + public static double quantile(double val, double rate) { + return -Math.log(1 - val) / rate; + } + + /** + * This method currently uses the naive approach of returning + * <code>-log(uniform)</code>. + * + * TODO: there are variants that do not rely on the log method and are faster. + * We need to implement and evaluate these. For details: see + * "Computer methods for sampling from the exponential and normal distributions" + * J. H. Ahrens, U. Dieter, https://dl.acm.org/citation.cfm?id=361593 + */ + @Override + public double nextRandom() { + return -Math.log(rnd.nextDouble()) / rate; + } +}
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/HaltonUniformDistribution.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/HaltonUniformDistribution.java new file mode 100644 index 00000000..df8fecda --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/HaltonUniformDistribution.java @@ -0,0 +1,313 @@ +package de.lmu.ifi.dbs.elki.math.statistics.distribution; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2012 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +import java.util.Random; + +import de.lmu.ifi.dbs.elki.math.MathUtil; +import de.lmu.ifi.dbs.elki.math.Primes; +import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; + +/** + * Halton sequences are a pseudo-uniform distribution. The data is actually too + * regular for a true uniform distribution, but as such will of course often + * appear to be uniform. + * + * Technically, they are based on Van der Corput sequence and the Von Neumann + * Katutani transformation. These produce a series of integers which then are + * converted to floating point values. + * + * To randomize, we just choose a random starting position, as indicated by + * + * Reference: + * <p> + * Randomized halton sequences<br> + * Wang, X. and Hickernell, F.J.<br /> + * Mathematical and Computer Modelling Vol. 32 (7) + * </p> + * + * <b>Important note: this code hasn't been double checked yet. While it + * probably works for some simple cases such as example data set generation, do + * <em>not</em> rely on it for e.g. quasi monte carlo methods without + * double-checking the quality, and looking at more advanced methods!</b> + * + * Let me repeat this: this code was written <b>to generate toy datasets</b>. It + * <b>may have deficits</b> for other uses! <b>There is a high chance it will + * produce correlated data when used for more than one dimension.</b> - for toy + * data sets, try different random seeds until you find one that works for you. + * + * TODO: find an improved algorithm that takes care of a better randomization, + * for example by adding scrambling. + * + * @author Erich Schubert + */ +@Reference(title = "Randomized halton sequences", authors = "Wang, X. and Hickernell, F.J.", booktitle = "Mathematical and Computer Modelling Vol. 32 (7)", url = "http://dx.doi.org/10.1016/S0895-7177(00)00178-3") +public class HaltonUniformDistribution implements DistributionWithRandom { + /** + * Minimum + */ + private double min; + + /** + * Maximum + */ + private double max; + + /** + * Len := max - min + */ + private double len; + + /** + * Maximum number of iterations of fast variant + */ + private static final int MAXFAST = 1000; + + /** + * Threshold + */ + private static final double ALMOST_ONE = 1.0 - 1e-10; + + /** + * Base value + */ + final short base; + + /** + * Inverse of base, for faster division by multiplication. + */ + final double invbase; + + /** + * Logarithm of base. + */ + final double logbase; + + /** + * Maximum integer to use + */ + final int maxi; + + /** + * Counter, for max iterations of fast function. + */ + int counter = 0; + + /** + * Current value + */ + double current; + + /** + * Integer inverse + */ + long inverse; + + /** + * Constructor for a halton pseudo uniform distribution on the interval [min, + * max[ + * + * @param min Minimum value + * @param max Maximum value + * @param base Base value + * @param seed Random seed (starting value) + */ + public HaltonUniformDistribution(double min, double max, int base, double seed) { + super(); + // Swap parameters if they were given incorrectly. + if (min > max) { + double tmp = min; + min = max; + max = tmp; + } + this.min = min; + this.max = max; + this.len = max - min; + + this.base = (short) base; + this.invbase = 1.0 / base; + this.logbase = Math.log(base); + // 32 bit * log(2) / log(base) + this.maxi = (int) (32.0 * MathUtil.LOG2 / logbase); + this.current = seed; + this.inverse = inverse(seed); + } + + /** + * Constructor for a halton pseudo uniform distribution on the interval [min, + * max[ + * + * @param min Minimum value + * @param max Maximum value + */ + public HaltonUniformDistribution(double min, double max) { + // TODO: use different starting primes? + this(min, max, new Random()); + } + + /** + * Constructor for a halton pseudo uniform distribution on the interval [min, + * max[ + * + * @param min Minimum value + * @param max Maximum value + * @param rnd Random generator + */ + public HaltonUniformDistribution(double min, double max, Random rnd) { + // TODO: use different starting primes? + this(min, max, choosePrime(rnd), rnd.nextDouble()); + } + + /** + * Choose a random prime. We try to avoid the later primes, as they are known + * to cause too correlated data. + * + * @param rnd Random generator + * @return Prime + */ + private static int choosePrime(Random rnd) { + return Primes.FIRST_PRIMES[rnd.nextInt(10)]; + } + + @Override + public double pdf(double val) { + if (val < min || val >= max) { + return 0.0; + } + return 1.0 / len; + } + + @Override + public double cdf(double val) { + if (val < min) { + return 0.0; + } + if (val > max) { + return 1.0; + } + return (val - min) / len; + } + + @Override + public double quantile(double val) { + return min + len * val; + } + + /** + * Compute the inverse with respect to the given base. + * + * @param current Current value + * @return Integer inverse + */ + private long inverse(double current) { + // Represent to base b. + short[] digits = new short[maxi]; + int j; + for (j = 0; j < maxi; j++) { + current *= base; + digits[j] = (short) current; + current -= digits[j]; + if (current <= 1e-10) { + break; + } + } + long inv = 0; + for (j = maxi - 1; j >= 0; j--) { + inv = inv * base + digits[j]; + } + return inv; + } + + /** + * Compute the radical inverse of i. + * + * @param i Input long value + * @return Double radical inverse + */ + private double radicalInverse(long i) { + double digit = 1.0 / (double) base; + double radical = digit; + double inverse = 0.0; + while (i > 0) { + inverse += digit * (double) (i % base); + digit *= radical; + i /= base; + } + return inverse; + } + + /** + * Compute the next radical inverse. + * + * @return Next inverse + */ + private double nextRadicalInverse() { + counter++; + // Do at most MAXFAST appromate steps + if (counter >= MAXFAST) { + counter = 0; + inverse += MAXFAST; + current = radicalInverse(inverse); + return current; + } + // Fast approximation: + double nextInverse = current + invbase; + if (nextInverse < ALMOST_ONE) { + current = nextInverse; + return current; + } else { + double digit1 = invbase, digit2 = invbase * invbase; + while (current + digit2 >= ALMOST_ONE) { + digit1 = digit2; + digit2 *= invbase; + } + current += (digit1 - 1.0) + digit2; + return current; + } + } + + @Override + public double nextRandom() { + return min + nextRadicalInverse() * len; + } + + @Override + public String toString() { + return "HaltonUniformDistribution(min=" + min + ", max=" + max + ")"; + } + + /** + * @return the minimum value + */ + public double getMin() { + return min; + } + + /** + * @return the maximum value + */ + public double getMax() { + return max; + } +} diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/NormalDistribution.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/NormalDistribution.java index 9180b59e..1845dec1 100644 --- a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/NormalDistribution.java +++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/NormalDistribution.java @@ -315,7 +315,7 @@ public class NormalDistribution implements DistributionWithRandom { public static double pdf(double x, double mu, double sigma) { final double x_mu = x - mu; final double sigmasq = sigma * sigma; - return 1 / (Math.sqrt(MathUtil.TWOPI * sigmasq)) * Math.exp(-1 * x_mu * x_mu / 2 / sigmasq); + return 1 / (Math.sqrt(MathUtil.TWOPI * sigmasq)) * Math.exp(-.5 * x_mu * x_mu / sigmasq); } /** diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/PoissonDistribution.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/PoissonDistribution.java index 53fb0dc8..a4ea9402 100644 --- a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/PoissonDistribution.java +++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/PoissonDistribution.java @@ -53,26 +53,26 @@ public class PoissonDistribution implements Distribution { private double p; /** Stirling error constants: 1./12 */ - private final static double S0 = 0.08333333333333333333333d; + private static final double S0 = 0.08333333333333333333333d; /** Stirling error constants: 1./360 */ - private final static double S1 = 0.0027777777777777777777777777778d; + private static final double S1 = 0.0027777777777777777777777777778d; /** Stirling error constants: 1./1260 */ - private final static double S2 = 0.00079365079365079365079365d; + private static final double S2 = 0.00079365079365079365079365d; /** Stirling error constants: 1./1680 */ - private final static double S3 = 0.000595238095238095238095238095d; + private static final double S3 = 0.000595238095238095238095238095d; /** Stirling error constants: 1./1188 */ - private final static double S4 = 0.00084175084175084175084175084175d; + private static final double S4 = 0.00084175084175084175084175084175d; /** * Exact table values for n <= 15 in steps of 0.5 * * sfe[n] = ln( (n!*e^n)/((n^n)*sqrt(2*pi*n)) ) */ - private final static double STIRLING_EXACT_ERROR[] = {// + private static final double STIRLING_EXACT_ERROR[] = {// 0.0, // 0.0 0.1534264097200273452913848, // 0.5 0.0810614667953272582196702, // 1.0 @@ -273,7 +273,7 @@ public class PoissonDistribution implements Distribution { private static double stirlingError(int n) { // Try to use a table value: if(n < 16) { - return STIRLING_EXACT_ERROR[n * 2]; + return STIRLING_EXACT_ERROR[n << 1]; } final double nn = n * n; // Use the appropriate number of terms diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/StudentsTDistribution.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/StudentsTDistribution.java index 2e9e0d15..fcb96c12 100644 --- a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/StudentsTDistribution.java +++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/StudentsTDistribution.java @@ -73,7 +73,7 @@ public class StudentsTDistribution implements Distribution { */
public static double pdf(double val, int v) {
// TODO: improve precision by computing "exp" last?
- return Math.exp(GammaDistribution.logGamma((v + 1) / 2) - GammaDistribution.logGamma(v / 2)) * (1 / Math.sqrt(v * Math.PI)) * Math.pow(1 + (val * val) / v, -((v + 1) / 2));
+ return Math.exp(GammaDistribution.logGamma((v + 1) * .5) - GammaDistribution.logGamma(v * .5)) * (1 / Math.sqrt(v * Math.PI)) * Math.pow(1 + (val * val) / v, -((v + 1) * .5));
}
/**
@@ -85,6 +85,6 @@ public class StudentsTDistribution implements Distribution { */
public static double cdf(double val, int v) {
double x = v / (val * val + v);
- return 1 - (0.5 * BetaDistribution.regularizedIncBeta(x, v / 2, 0.5));
+ return 1 - (0.5 * BetaDistribution.regularizedIncBeta(x, v * .5, 0.5));
}
}
\ No newline at end of file |