diff options
Diffstat (limited to 'src/de/lmu/ifi/dbs/elki/math/statistics/distribution')
8 files changed, 1253 insertions, 0 deletions
diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/ChiDistribution.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/ChiDistribution.java new file mode 100644 index 00000000..c561c4cd --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/ChiDistribution.java @@ -0,0 +1,92 @@ +package de.lmu.ifi.dbs.elki.math.statistics.distribution; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2012 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +/** + * Chi distribution. + * + * @author Erich Schubert + */ +public class ChiDistribution implements Distribution { + /** + * Degrees of freedom. Usually integer. + */ + private double dof; + + /** + * Chi squared distribution (for random generation) + */ + private ChiSquaredDistribution chisq; + + /** + * Constructor. + * + * @param dof Degrees of freedom. Usually integer. + */ + public ChiDistribution(double dof) { + super(); + this.dof = dof; + this.chisq = new ChiSquaredDistribution(dof); + } + + @Override + public double nextRandom() { + return Math.sqrt(chisq.nextRandom()); + } + + @Override + public double pdf(double val) { + return pdf(val, dof); + } + + /** + * PDF function + * + * @param val Value + * @param dof Degrees of freedom + * @return Pdf value + */ + public static double pdf(double val, double dof) { + if(val < 0) { + return 0.0; + } + return Math.sqrt(ChiSquaredDistribution.pdf(val, dof)); + } + + @Override + public double cdf(double val) { + return cdf(val, dof); + } + + /** + * Cumulative density function. + * + * @param val Value + * @param dof Degrees of freedom. + * @return CDF value + */ + public static double cdf(double val, double dof) { + return GammaDistribution.regularizedGammaP(dof / 2, val * val / 2); + } +}
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/ChiSquaredDistribution.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/ChiSquaredDistribution.java new file mode 100644 index 00000000..0ab39c78 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/ChiSquaredDistribution.java @@ -0,0 +1,72 @@ +package de.lmu.ifi.dbs.elki.math.statistics.distribution; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2012 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +/** + * Chi-Squared distribution (a specialization of the Gamma distribution). + * + * @author Erich Schubert + */ +public class ChiSquaredDistribution extends GammaDistribution { + /** + * Constructor. + * + * @param dof Degrees of freedom. + */ + public ChiSquaredDistribution(double dof) { + super(.5 * dof, 2.0); + } + + /** + * The CDF, static version. + * + * @param val Value + * @param dof Degrees of freedom. + * @return cdf value + */ + public static double cdf(double val, double dof) { + return regularizedGammaP(.5 * dof, .5 * val); + } + + /** + * Chi-Squared distribution PDF (with 0.0 for x < 0) + * + * @param x query value + * @param dof Degrees of freedom. + * @return probability density + */ + public static double pdf(double x, double dof) { + if(x <= 0) { + return 0.0; + } + if(x == 0) { + return 0.0; + } + final double k = dof / 2; + if(k == 1.0) { + return Math.exp(-x * 2.0) * 2.0; + } + return Math.exp((k - 1.0) * Math.log(x * 2.0) - x * 2.0 - logGamma(k)) * 2.0; + } +}
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/ConstantDistribution.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/ConstantDistribution.java new file mode 100644 index 00000000..1f36dd4a --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/ConstantDistribution.java @@ -0,0 +1,61 @@ +package de.lmu.ifi.dbs.elki.math.statistics.distribution; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2012 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +/** + * Pseudo distribution, that has a unique constant value. + * + * @author Erich Schubert + */ +public class ConstantDistribution implements Distribution { + /** + * The constant + */ + final double c; + + /** + * Constructor. + * + * @param c Constant + */ + public ConstantDistribution(double c) { + super(); + this.c = c; + } + + @Override + public double nextRandom() { + return c; + } + + @Override + public double pdf(double val) { + return (val == c) ? 1 : 0; + } + + @Override + public double cdf(double val) { + return (val >= c) ? 1.0 : 0.0; + } +}
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/Distribution.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/Distribution.java new file mode 100644 index 00000000..290e6434 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/Distribution.java @@ -0,0 +1,63 @@ +package de.lmu.ifi.dbs.elki.math.statistics.distribution; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2012 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +/** + * Interface for a simple distribution generator with a PDF, i.e. it can also + * compute a density + * + * @author Erich Schubert + */ +public interface Distribution { + /** + * Generate a new random value + * + * @return new random value + */ + public double nextRandom(); + + /** + * Return the density of an existing value + * + * @param val existing value + * @return distribution density + */ + public double pdf(double val); + + /** + * Return the cumulative density function at the given value. + * + * @param val existing value + * @return cumulative density + */ + public double cdf(double val); + + /** + * Describe the distribution + * + * @return description + */ + @Override + public String toString(); +} diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/GammaDistribution.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/GammaDistribution.java new file mode 100644 index 00000000..6830f25a --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/GammaDistribution.java @@ -0,0 +1,470 @@ +package de.lmu.ifi.dbs.elki.math.statistics.distribution; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2012 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +import java.util.Random; + +import de.lmu.ifi.dbs.elki.math.MathUtil; + +/** + * Gamma Distribution, with random generation and density functions. + * + * @author Erich Schubert + */ +public class GammaDistribution implements Distribution { + /** + * LANCZOS-Coefficients for Gamma approximation. + * + * These are said to have higher precision than those in "Numerical Recipes". + * They probably come from + * + * Paul Godfrey: http://my.fit.edu/~gabdo/gamma.txt + */ + static final double[] LANCZOS = { 0.99999999999999709182, 57.156235665862923517, -59.597960355475491248, 14.136097974741747174, -0.49191381609762019978, .33994649984811888699e-4, .46523628927048575665e-4, -.98374475304879564677e-4, .15808870322491248884e-3, -.21026444172410488319e-3, .21743961811521264320e-3, -.16431810653676389022e-3, .84418223983852743293e-4, -.26190838401581408670e-4, .36899182659531622704e-5, }; + + /** + * Numerical precision to use + */ + static final double NUM_PRECISION = 1E-15; + + /** + * Alpha == k + */ + private final double k; + + /** + * Theta == 1 / Beta + */ + private final double theta; + + /** + * The random generator. + */ + private Random random; + + /** + * Constructor for Gamma distribution. + * + * @param k k, alpha aka. "shape" parameter + * @param theta Theta = 1.0/Beta aka. "scaling" parameter + * @param random Random generator + */ + public GammaDistribution(double k, double theta, Random random) { + super(); + if(k <= 0.0 || theta <= 0.0) { + throw new IllegalArgumentException("Invalid parameters for Gamma distribution."); + } + + this.k = k; + this.theta = theta; + this.random = random; + } + + /** + * Constructor for Gamma distribution. + * + * @param k k, alpha aka. "shape" parameter + * @param theta Theta = 1.0/Beta aka. "scaling" parameter + */ + public GammaDistribution(double k, double theta) { + this(k, theta, new Random()); + } + + @Override + public double pdf(double val) { + return pdf(val, k, theta); + } + + @Override + public double cdf(double val) { + return cdf(val, k, theta); + } + + @Override + public double nextRandom() { + return nextRandom(k, theta, random); + } + + /** + * Simple toString explaining the distribution parameters. + * + * Used in producing a model description. + */ + @Override + public String toString() { + return "Gamma Distribution (k=" + k + ", theta=" + theta + ")"; + } + + /** + * @return the value of k + */ + public double getK() { + return k; + } + + /** + * @return the standard deviation + */ + public double getTheta() { + return theta; + } + + /** + * The CDF, static version. + * + * @param val Value + * @param k Shape k + * @param theta Theta = 1.0/Beta aka. "scaling" parameter + * @return cdf value + */ + public static double cdf(double val, double k, double theta) { + return regularizedGammaP(k, val / theta); + } + + /** + * Gamma distribution PDF (with 0.0 for x < 0) + * + * @param x query value + * @param k Alpha + * @param theta Theta = 1 / Beta + * @return probability density + */ + public static double pdf(double x, double k, double theta) { + if(x < 0) { + return 0.0; + } + if(x == 0) { + if(k == 1.0) { + return theta; + } + else { + return 0.0; + } + } + if(k == 1.0) { + return Math.exp(-x * theta) * theta; + } + + return Math.exp((k - 1.0) * Math.log(x * theta) - x * theta - logGamma(k)) * theta; + } + + /** + * Compute logGamma. + * + * Based loosely on "Numerical Recpies" and the work of Paul Godfrey at + * http://my.fit.edu/~gabdo/gamma.txt + * + * TODO: find out which approximation really is the best... + * + * @param x Parameter x + * @return log(Γ(x)) + */ + public static double logGamma(final double x) { + if(Double.isNaN(x) || (x <= 0.0)) { + return Double.NaN; + } + double g = 607.0 / 128.0; + double tmp = x + g + .5; + tmp = (x + 0.5) * Math.log(tmp) - tmp; + double ser = LANCZOS[0]; + for(int i = LANCZOS.length - 1; i > 0; --i) { + ser += LANCZOS[i] / (x + i); + } + return tmp + Math.log(MathUtil.SQRTTWOPI * ser / x); + } + + /** + * Returns the regularized gamma function P(a, x). + * + * Includes the quadrature way of computing. + * + * TODO: find "the" most accurate version of this. We seem to agree with + * others for the first 10+ digits, but diverge a bit later than that. + * + * @param a Parameter a + * @param x Parameter x + * @return Gamma value + */ + public static double regularizedGammaP(final double a, final double x) { + // Special cases + if(Double.isNaN(a) || Double.isNaN(x) || (a <= 0.0) || (x < 0.0)) { + return Double.NaN; + } + if(x == 0.0) { + return 0.0; + } + if(x >= a + 1) { + // Expected to converge faster + return 1.0 - regularizedGammaQ(a, x); + } + // Loosely following "Numerical Recipes" + double del = 1.0 / a; + double sum = del; + for(int n = 1; n < Integer.MAX_VALUE; n++) { + // compute next element in the series + del *= x / (a + n); + sum = sum + del; + if(Math.abs(del / sum) < NUM_PRECISION || sum >= Double.POSITIVE_INFINITY) { + break; + } + } + if(Double.isInfinite(sum)) { + return 1.0; + } + return Math.exp(-x + (a * Math.log(x)) - logGamma(a)) * sum; + } + + /** + * Returns the regularized gamma function Q(a, x) = 1 - P(a, x). + * + * Includes the continued fraction way of computing, based loosely on the book + * "Numerical Recipes"; but probably not with the exactly same precision, + * since we reimplemented this in our coding style, not literally. + * + * TODO: find "the" most accurate version of this. We seem to agree with + * others for the first 10+ digits, but diverge a bit later than that. + * + * @param a parameter a + * @param x parameter x + * @return Result + */ + public static double regularizedGammaQ(final double a, final double x) { + if(Double.isNaN(a) || Double.isNaN(x) || (a <= 0.0) || (x < 0.0)) { + return Double.NaN; + } + if(x == 0.0) { + return 1.0; + } + if(x < a + 1.0) { + // Expected to converge faster + return 1.0 - regularizedGammaP(a, x); + } + // Compute using continued fraction approach. + final double FPMIN = Double.MIN_VALUE / NUM_PRECISION; + double b = x + 1 - a; + double c = 1.0 / FPMIN; + double d = 1.0 / b; + double fac = d; + for(int i = 1; i < Integer.MAX_VALUE; i++) { + double an = i * (a - i); + b += 2; + d = an * d + b; + if(Math.abs(d) < FPMIN) { + d = FPMIN; + } + c = b + an / c; + if(Math.abs(c) < FPMIN) { + c = FPMIN; + } + d = 1 / d; + double del = d * c; + fac *= del; + if(Math.abs(del - 1.0) <= NUM_PRECISION) { + break; + } + } + return fac * Math.exp(-x + a * Math.log(x) - logGamma(a)); + } + + /** + * Generate a random value with the generators parameters. + * + * Along the lines of + * + * - J.H. Ahrens, U. Dieter (1974): Computer methods for sampling from gamma, + * beta, Poisson and binomial distributions, Computing 12, 223-246. + * + * - J.H. Ahrens, U. Dieter (1982): Generating gamma variates by a modified + * rejection technique, Communications of the ACM 25, 47-54. + * + * @param k K parameter + * @param theta Theta parameter + * @param random Random generator + */ + public static double nextRandom(double k, double theta, Random random) { + /* Constants */ + final double q1 = 0.0416666664, q2 = 0.0208333723, q3 = 0.0079849875; + final double q4 = 0.0015746717, q5 = -0.0003349403, q6 = 0.0003340332; + final double q7 = 0.0006053049, q8 = -0.0004701849, q9 = 0.0001710320; + final double a1 = 0.333333333, a2 = -0.249999949, a3 = 0.199999867; + final double a4 = -0.166677482, a5 = 0.142873973, a6 = -0.124385581; + final double a7 = 0.110368310, a8 = -0.112750886, a9 = 0.104089866; + final double e1 = 1.000000000, e2 = 0.499999994, e3 = 0.166666848; + final double e4 = 0.041664508, e5 = 0.008345522, e6 = 0.001353826; + final double e7 = 0.000247453; + + if(k < 1.0) { // Base case, for small k + final double b = 1.0 + 0.36788794412 * k; // Step 1 + while(true) { + final double p = b * random.nextDouble(); + if(p <= 1.0) { // when gds <= 1 + final double gds = Math.exp(Math.log(p) / k); + if(Math.log(random.nextDouble()) <= -gds) { + return (gds / theta); + } + } + else { // when gds > 1 + final double gds = -Math.log((b - p) / k); + if(Math.log(random.nextDouble()) <= ((k - 1.0) * Math.log(gds))) { + return (gds / theta); + } + } + } + } + else { + // Step 1. Preparations + final double ss, s, d; + if(k != -1.0) { + ss = k - 0.5; + s = Math.sqrt(ss); + d = 5.656854249 - 12.0 * s; + } + else { + // For k == -1.0: + ss = 0.0; + s = 0.0; + d = 0.0; + } + // Random vector of maximum length 1 + final double v1, /* v2, */v12; + { // Temporary values - candidate + double tv1, tv2, tv12; + do { + tv1 = 2.0 * random.nextDouble() - 1.0; + tv2 = 2.0 * random.nextDouble() - 1.0; + tv12 = tv1 * tv1 + tv2 * tv2; + } + while(tv12 > 1.0); + v1 = tv1; + /* v2 = tv2; */ + v12 = tv12; + } + + // double b = 0.0, c = 0.0; + // double si = 0.0, q0 = 0.0; + final double b, c, si, q0; + + // Simpler accept cases & parameter computation + { + final double t = v1 * Math.sqrt(-2.0 * Math.log(v12) / v12); + final double x = s + 0.5 * t; + final double gds = x * x; + if(t >= 0.0) { + return (gds / theta); // Immediate acceptance + } + + // Random uniform + final double un = random.nextDouble(); + // Squeeze acceptance + if(d * un <= t * t * t) { + return (gds / theta); + } + + if(k != -1.0) { // Step 4. Set-up for hat case + final double r = 1.0 / k; + q0 = ((((((((q9 * r + q8) * r + q7) * r + q6) * r + q5) * r + q4) * r + q3) * r + q2) * r + q1) * r; + if(k > 3.686) { + if(k > 13.022) { + b = 1.77; + si = 0.75; + c = 0.1515 / s; + } + else { + b = 1.654 + 0.0076 * ss; + si = 1.68 / s + 0.275; + c = 0.062 / s + 0.024; + } + } + else { + b = 0.463 + s - 0.178 * ss; + si = 1.235; + c = 0.195 / s - 0.079 + 0.016 * s; + } + } + else { + // For k == -1.0: + b = 0.0; + c = 0.0; + si = 0.0; + q0 = 0.0; + } + // Compute v and q + if(x > 0.0) { + final double v = t / (s + s); + final double q; + if(Math.abs(v) > 0.25) { + q = q0 - s * t + 0.25 * t * t + (ss + ss) * Math.log(1.0 + v); + } + else { + q = q0 + 0.5 * t * t * ((((((((a9 * v + a8) * v + a7) * v + a6) * v + a5) * v + a4) * v + a3) * v + a2) * v + a1) * v; + } + // Quotient acceptance: + if(Math.log(1.0 - un) <= q) { + return (gds / theta); + } + } + } + + // Double exponential deviate t + while(true) { + double e, u, sign_u, t; + // Retry until t is sufficiently large + do { + e = -Math.log(random.nextDouble()); + u = random.nextDouble(); + u = u + u - 1.0; + sign_u = (u > 0) ? 1.0 : -1.0; + t = b + (e * si) * sign_u; + } + while(t <= -0.71874483771719); + + // New v(t) and q(t) + final double v = t / (s + s); + final double q; + if(Math.abs(v) > 0.25) { + q = q0 - s * t + 0.25 * t * t + (ss + ss) * Math.log(1.0 + v); + } + else { + q = q0 + 0.5 * t * t * ((((((((a9 * v + a8) * v + a7) * v + a6) * v + a5) * v + a4) * v + a3) * v + a2) * v + a1) * v; + } + if(q <= 0.0) { + continue; // retry + } + // Compute w(t) + final double w; + if(q > 0.5) { + w = Math.exp(q) - 1.0; + } + else { + w = ((((((e7 * q + e6) * q + e5) * q + e4) * q + e3) * q + e2) * q + e1) * q; + } + // Hat acceptance + if(c * u * sign_u <= w * Math.exp(e - 0.5 * t * t)) { + final double x = s + 0.5 * t; + return (x * x / theta); + } + } + } + } +}
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/NormalDistribution.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/NormalDistribution.java new file mode 100644 index 00000000..919cc2e3 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/NormalDistribution.java @@ -0,0 +1,337 @@ +package de.lmu.ifi.dbs.elki.math.statistics.distribution; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2012 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +import java.util.Random; + +import de.lmu.ifi.dbs.elki.math.MathUtil; + +/** + * Gaussian distribution aka normal distribution + * + * @author Erich Schubert + */ +public class NormalDistribution implements Distribution { + /** + * Coefficients for erf approximation. + * + * Loosely based on http://www.netlib.org/specfun/erf + */ + static final double ERFAPP_A[] = { 1.85777706184603153e-1, 3.16112374387056560e+0, 1.13864154151050156E+2, 3.77485237685302021e+2, 3.20937758913846947e+3 }; + + /** + * Coefficients for erf approximation. + * + * Loosely based on http://www.netlib.org/specfun/erf + */ + static final double ERFAPP_B[] = { 1.00000000000000000e00, 2.36012909523441209e01, 2.44024637934444173e02, 1.28261652607737228e03, 2.84423683343917062e03 }; + + /** + * Coefficients for erf approximation. + * + * Loosely based on http://www.netlib.org/specfun/erf + */ + static final double ERFAPP_C[] = { 2.15311535474403846e-8, 5.64188496988670089e-1, 8.88314979438837594e00, 6.61191906371416295e01, 2.98635138197400131e02, 8.81952221241769090e02, 1.71204761263407058e03, 2.05107837782607147e03, 1.23033935479799725E03 }; + + /** + * Coefficients for erf approximation. + * + * Loosely based on http://www.netlib.org/specfun/erf + */ + static final double ERFAPP_D[] = { 1.00000000000000000e00, 1.57449261107098347e01, 1.17693950891312499e02, 5.37181101862009858e02, 1.62138957456669019e03, 3.29079923573345963e03, 4.36261909014324716e03, 3.43936767414372164e03, 1.23033935480374942e03 }; + + /** + * Coefficients for erf approximation. + * + * Loosely based on http://www.netlib.org/specfun/erf + */ + static final double ERFAPP_P[] = { 1.63153871373020978e-2, 3.05326634961232344e-1, 3.60344899949804439e-1, 1.25781726111229246e-1, 1.60837851487422766e-2, 6.58749161529837803e-4 }; + + /** + * Coefficients for erf approximation. + * + * Loosely based on http://www.netlib.org/specfun/erf + */ + static final double ERFAPP_Q[] = { 1.00000000000000000e00, 2.56852019228982242e00, 1.87295284992346047e00, 5.27905102951428412e-1, 6.05183413124413191e-2, 2.33520497626869185e-3 }; + + /** + * Treshold for switching nethods for erfinv approximation + */ + static final double P_LOW = 0.02425D; + + /** + * Treshold for switching nethods for erfinv approximation + */ + static final double P_HIGH = 1.0D - P_LOW; + + /** + * Coefficients for erfinv approximation, rational version + */ + static final double ERFINV_A[] = { -3.969683028665376e+01, 2.209460984245205e+02, -2.759285104469687e+02, 1.383577518672690e+02, -3.066479806614716e+01, 2.506628277459239e+00 }; + + /** + * Coefficients for erfinv approximation, rational version + */ + static final double ERFINV_B[] = { -5.447609879822406e+01, 1.615858368580409e+02, -1.556989798598866e+02, 6.680131188771972e+01, -1.328068155288572e+01 }; + + /** + * Coefficients for erfinv approximation, rational version + */ + static final double ERFINV_C[] = { -7.784894002430293e-03, -3.223964580411365e-01, -2.400758277161838e+00, -2.549732539343734e+00, 4.374664141464968e+00, 2.938163982698783e+00 }; + + /** + * Coefficients for erfinv approximation, rational version + */ + static final double ERFINV_D[] = { 7.784695709041462e-03, 3.224671290700398e-01, 2.445134137142996e+00, 3.754408661907416e+00 }; + + /** + * Mean value for the generator + */ + private double mean; + + /** + * Standard deviation + */ + private double stddev; + + /** + * The random generator. + */ + private Random random; + + /** + * Constructor for Gaussian distribution + * + * @param mean Mean + * @param stddev Standard Deviation + * @param random Random generator + */ + public NormalDistribution(double mean, double stddev, Random random) { + super(); + this.mean = mean; + this.stddev = stddev; + this.random = random; + } + + /** + * Constructor for Gaussian distribution + * + * @param mean Mean + * @param stddev Standard Deviation + */ + public NormalDistribution(double mean, double stddev) { + this(mean, stddev, new Random()); + } + + @Override + public double pdf(double val) { + return pdf(val, mean, stddev); + } + + @Override + public double cdf(double val) { + return cdf(val, mean, stddev); + } + + @Override + public double nextRandom() { + return mean + random.nextGaussian() * stddev; + } + + @Override + public String toString() { + return "Normal Distribution (mean="+mean+", stddev="+stddev+")"; + } + + /** + * @return the mean + */ + public double getMean() { + return mean; + } + + /** + * @return the standard deviation + */ + public double getStddev() { + return stddev; + } + + /** + * Complementary error function for Gaussian distributions = Normal + * distributions. + * + * Numerical approximation using taylor series. Implementation loosely based + * on http://www.netlib.org/specfun/erf + * + * @param x parameter value + * @return erfc(x) + */ + public static double erfc(double x) { + if(Double.isNaN(x)) { + return Double.NaN; + } + if(Double.isInfinite(x)) { + return (x < 0.0) ? 2 : 0; + } + + double result = Double.NaN; + double absx = Math.abs(x); + // First approximation interval + if(absx < 0.46875) { + double z = x * x; + result = 1 - x * ((((ERFAPP_A[0] * z + ERFAPP_A[1]) * z + ERFAPP_A[2]) * z + ERFAPP_A[3]) * z + ERFAPP_A[4]) / ((((ERFAPP_B[0] * z + ERFAPP_B[1]) * z + ERFAPP_B[2]) * z + ERFAPP_B[3]) * z + ERFAPP_B[4]); + } + // Second approximation interval + else if(absx < 4.0) { + double z = absx; + result = ((((((((ERFAPP_C[0] * z + ERFAPP_C[1]) * z + ERFAPP_C[2]) * z + ERFAPP_C[3]) * z + ERFAPP_C[4]) * z + ERFAPP_C[5]) * z + ERFAPP_C[6]) * z + ERFAPP_C[7]) * z + ERFAPP_C[8]) / ((((((((ERFAPP_D[0] * z + ERFAPP_D[1]) * z + ERFAPP_D[2]) * z + ERFAPP_D[3]) * z + ERFAPP_D[4]) * z + ERFAPP_D[5]) * z + ERFAPP_D[6]) * z + ERFAPP_D[7]) * z + ERFAPP_D[8]); + double rounded = Math.round(result * 16.0) / 16.0; + double del = (absx - rounded) * (absx + rounded); + result = Math.exp(-rounded * rounded) * Math.exp(-del) * result; + if(x < 0.0) { + result = 2.0 - result; + } + } + // Third approximation interval + else { + double z = 1.0 / (absx * absx); + result = z * (((((ERFAPP_P[0] * z + ERFAPP_P[1]) * z + ERFAPP_P[2]) * z + ERFAPP_P[3]) * z + ERFAPP_P[4]) * z + ERFAPP_P[5]) / (((((ERFAPP_Q[0] * z + ERFAPP_Q[1]) * z + ERFAPP_Q[2]) * z + ERFAPP_Q[3]) * z + ERFAPP_Q[4]) * z + ERFAPP_Q[5]); + result = (MathUtil.ONE_BY_SQRTPI - result) / absx; + double rounded = Math.round(result * 16.0) / 16.0; + double del = (absx - rounded) * (absx + rounded); + result = Math.exp(-rounded * rounded) * Math.exp(-del) * result; + if(x < 0.0) { + result = 2.0 - result; + } + } + return result; + } + + /** + * Error function for Gaussian distributions = Normal distributions. + * + * Numerical approximation using taylor series. Implementation loosely based + * on http://www.netlib.org/specfun/erf + * + * @param x parameter value + * @return erf(x) + */ + public static double erf(double x) { + return 1 - erfc(x); + } + + /** + * Inverse error function. + * + * @param x parameter value + * @return erfinv(x) + */ + public static double erfinv(double x) { + return standardNormalProbit(0.5 * (x + 1)) / MathUtil.SQRT2; + } + + /** + * Approximate the inverse error function for normal distributions. + * + * Largely based on: + * <p> + * http://www.math.uio.no/~jacklam/notes/invnorm/index.html <br> + * by Peter John Acklam + * </p> + * + * @param d Quantile. Must be in [0:1], obviously. + * @return Inverse erf. + */ + public static double standardNormalProbit(double d) { + if(d == 0) { + return Double.NEGATIVE_INFINITY; + } + else if(d == 1) { + return Double.POSITIVE_INFINITY; + } + else if(Double.isNaN(d) || d < 0 || d > 1) { + return Double.NaN; + } + else if(d < P_LOW) { + // Rational approximation for lower region: + double q = Math.sqrt(-2 * Math.log(d)); + return (((((ERFINV_C[0] * q + ERFINV_C[1]) * q + ERFINV_C[2]) * q + ERFINV_C[3]) * q + ERFINV_C[4]) * q + ERFINV_C[5]) / ((((ERFINV_D[0] * q + ERFINV_D[1]) * q + ERFINV_D[2]) * q + ERFINV_D[3]) * q + 1); + } + else if(P_HIGH < d) { + // Rational approximation for upper region: + double q = Math.sqrt(-2 * Math.log(1 - d)); + return -(((((ERFINV_C[0] * q + ERFINV_C[1]) * q + ERFINV_C[2]) * q + ERFINV_C[3]) * q + ERFINV_C[4]) * q + ERFINV_C[5]) / ((((ERFINV_D[0] * q + ERFINV_D[1]) * q + ERFINV_D[2]) * q + ERFINV_D[3]) * q + 1); + } + else { + // Rational approximation for central region: + double q = d - 0.5D; + double r = q * q; + return (((((ERFINV_A[0] * r + ERFINV_A[1]) * r + ERFINV_A[2]) * r + ERFINV_A[3]) * r + ERFINV_A[4]) * r + ERFINV_A[5]) * q / (((((ERFINV_B[0] * r + ERFINV_B[1]) * r + ERFINV_B[2]) * r + ERFINV_B[3]) * r + ERFINV_B[4]) * r + 1); + } + } + + /** + * Probability density function of the normal distribution. + * + * <pre> + * 1/(SQRT(2*pi*sigma^2)) * e^(-(x-mu)^2/2sigma^2) + * </pre> + * + * @param x The value. + * @param mu The mean. + * @param sigma The standard deviation. + * @return PDF of the given normal distribution at x. + */ + public static double pdf(double x, double mu, double sigma) { + final double x_mu = x - mu; + final double sigmasq = sigma * sigma; + return 1 / (Math.sqrt(MathUtil.TWOPI * sigmasq)) * Math.exp(-1 * x_mu * x_mu / 2 / sigmasq); + } + + /** + * Cumulative probability density function (CDF) of a normal distribution. + * + * @param x value to evaluate CDF at + * @param mu Mean value + * @param sigma Standard deviation. + * @return The CDF of the normal given distribution at x. + */ + public static double cdf(double x, double mu, double sigma) { + return (1 + erf(x / Math.sqrt(2))) / 2; + } + + /** + * Inverse cumulative probability density function (probit) of a normal + * distribution. + * + * @param x value to evaluate probit function at + * @param mu Mean value + * @param sigma Standard deviation. + * @return The probit of the normal given distribution at x. + */ + public static double probit(double x, double mu, double sigma) { + return mu + sigma * standardNormalProbit(x); + } +} diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/UniformDistribution.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/UniformDistribution.java new file mode 100644 index 00000000..9571cfd3 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/UniformDistribution.java @@ -0,0 +1,132 @@ +package de.lmu.ifi.dbs.elki.math.statistics.distribution; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2012 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +import java.util.Random; + +/** + * Uniform distribution. + * + * @author Erich Schubert + */ +public class UniformDistribution implements Distribution { + /** + * Minimum + */ + private double min; + + /** + * Maximum + */ + private double max; + + /** + * Len := max - min + */ + private double len; + + /** + * The random generator. + */ + private Random random; + + /** + * Constructor for a uniform distribution on the interval [min, max[ + * + * @param min Minimum value + * @param max Maximum value + * @param random Random generator + */ + public UniformDistribution(double min, double max, Random random) { + super(); + // Swap parameters if they were given incorrectly. + if(min > max) { + double tmp = min; + min = max; + max = tmp; + } + this.min = min; + this.max = max; + this.len = max - min; + this.random = random; + } + + /** + * Constructor for a uniform distribution on the interval [min, max[ + * + * @param min Minimum value + * @param max Maximum value + */ + public UniformDistribution(double min, double max) { + this(min, max, new Random()); + } + + @Override + public double pdf(double val) { + if(val < min || val >= max) { + return 0.0; + } + return 1.0 / len; + } + + @Override + public double cdf(double val) { + if(val < min) { + return 0.0; + } + if(val > max) { + return 1.0; + } + return (val - min) / len; + } + + @Override + public double nextRandom() { + return min + random.nextDouble() * len; + } + + /** + * Simple toString explaining the distribution parameters. + * + * Used in describing cluster models. + */ + @Override + public String toString() { + return "Uniform Distribution (min=" + min + ", max=" + max + ")"; + } + + /** + * @return the minimum value + */ + public double getMin() { + return min; + } + + /** + * @return the maximum value + */ + public double getMax() { + return max; + } +}
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/package-info.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/package-info.java new file mode 100644 index 00000000..ed9c0e88 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/package-info.java @@ -0,0 +1,26 @@ +/** + * <p>Standard distributions, with random generation functionalities.</p> + */ +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2012 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +package de.lmu.ifi.dbs.elki.math.statistics.distribution;
\ No newline at end of file |