summaryrefslogtreecommitdiff
path: root/src/de/lmu/ifi/dbs/elki/math/statistics/distribution
diff options
context:
space:
mode:
Diffstat (limited to 'src/de/lmu/ifi/dbs/elki/math/statistics/distribution')
-rw-r--r--src/de/lmu/ifi/dbs/elki/math/statistics/distribution/BetaDistribution.java499
-rw-r--r--src/de/lmu/ifi/dbs/elki/math/statistics/distribution/ChiDistribution.java17
-rw-r--r--src/de/lmu/ifi/dbs/elki/math/statistics/distribution/ChiSquaredDistribution.java23
-rw-r--r--src/de/lmu/ifi/dbs/elki/math/statistics/distribution/ConstantDistribution.java7
-rw-r--r--src/de/lmu/ifi/dbs/elki/math/statistics/distribution/Distribution.java20
-rw-r--r--src/de/lmu/ifi/dbs/elki/math/statistics/distribution/DistributionWithRandom.java37
-rw-r--r--src/de/lmu/ifi/dbs/elki/math/statistics/distribution/GammaDistribution.java496
-rw-r--r--src/de/lmu/ifi/dbs/elki/math/statistics/distribution/NormalDistribution.java26
-rw-r--r--src/de/lmu/ifi/dbs/elki/math/statistics/distribution/PoissonDistribution.java411
-rw-r--r--src/de/lmu/ifi/dbs/elki/math/statistics/distribution/StudentsTDistribution.java90
-rw-r--r--src/de/lmu/ifi/dbs/elki/math/statistics/distribution/UniformDistribution.java14
11 files changed, 1602 insertions, 38 deletions
diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/BetaDistribution.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/BetaDistribution.java
new file mode 100644
index 00000000..1efe19dd
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/BetaDistribution.java
@@ -0,0 +1,499 @@
+package de.lmu.ifi.dbs.elki.math.statistics.distribution;
+
+import java.util.Random;
+
+import de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2012
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/**
+ * Beta Distribution with implementation of the regularized incomplete beta
+ * function
+ *
+ * @author Jan Brusis
+ * @author Erich Schubert
+ */
+public class BetaDistribution implements DistributionWithRandom {
+ /**
+ * Numerical precision to use
+ */
+ static final double NUM_PRECISION = 1E-15;
+
+ /**
+ * Limit of when to switch to quadrature method
+ */
+ static final double SWITCH = 3000;
+
+ /**
+ * Abscissas for Gauss-Legendre quadrature
+ */
+ static final double[] GAUSSLEGENDRE_Y = { 0.0021695375159141994, 0.011413521097787704, 0.027972308950302116, 0.051727015600492421, 0.082502225484340941, 0.12007019910960293, 0.16415283300752470, 0.21442376986779355, 0.27051082840644336, 0.33199876341447887, 0.39843234186401943, 0.46931971407375483, 0.54413605556657973, 0.62232745288031077, 0.70331500465597174, 0.78649910768313447, 0.87126389619061517, 0.95698180152629142 };
+
+ /**
+ * Weights for Gauss-Legendre quadrature
+ */
+ static final double[] GAUSSLEGENDRE_W = { 0.0055657196642445571, 0.012915947284065419, 0.020181515297735382, 0.027298621498568734, 0.034213810770299537, 0.040875750923643261, 0.047235083490265582, 0.053244713977759692, 0.058860144245324798, 0.064039797355015485, 0.068745323835736408, 0.072941885005653087, 0.076598410645870640, 0.079687828912071670, 0.082187266704339706, 0.084078218979661945, 0.085346685739338721, 0.085983275670394821 };
+
+ /**
+ * Shape parameter of beta distribution
+ */
+ private final double alpha;
+
+ /**
+ * Shape parameter of beta distribution
+ */
+ private final double beta;
+
+ /**
+ * For random number generation
+ */
+ private Random random;
+
+ /**
+ * Log beta(a, b) cache
+ */
+ private double logbab;
+
+ /**
+ * Constructor.
+ *
+ * @param a shape Parameter a
+ * @param b shape Parameter b
+ */
+ public BetaDistribution(double a, double b) {
+ this(a, b, new Random());
+ }
+
+ /**
+ * Constructor.
+ *
+ * @param a shape Parameter a
+ * @param b shape Parameter b
+ * @param random Random generator
+ */
+ public BetaDistribution(double a, double b, Random random) {
+ super();
+ if(a <= 0.0 || b <= 0.0) {
+ throw new IllegalArgumentException("Invalid parameters for Beta distribution.");
+ }
+
+ this.alpha = a;
+ this.beta = b;
+ this.logbab = logBeta(a, b);
+ this.random = random;
+ }
+
+ @Override
+ public double pdf(double val) {
+ if(val < 0. || val > 1.) {
+ return 0.;
+ }
+ if(val == 0.) {
+ if(alpha > 1.) {
+ return 0.;
+ }
+ if(alpha < 1.) {
+ return Double.POSITIVE_INFINITY;
+ }
+ return beta;
+ }
+ if(val == 1.) {
+ if(beta > 1.) {
+ return 0.;
+ }
+ if(beta < 1.) {
+ return Double.POSITIVE_INFINITY;
+ }
+ return alpha;
+ }
+ return Math.exp(-logbab + Math.log(val) * (alpha - 1) + Math.log1p(-val) * (beta - 1));
+ }
+
+ @Override
+ public double cdf(double x) {
+ if(alpha <= 0.0 || beta <= 0.0 || Double.isNaN(alpha) || Double.isNaN(beta) || Double.isNaN(x)) {
+ return Double.NaN;
+ }
+ if(x <= 0.0) {
+ return 0.0;
+ }
+ if(x >= 1.0) {
+ return 1.0;
+ }
+ if(alpha > SWITCH && beta > SWITCH) {
+ return regularizedIncBetaQuadrature(alpha, beta, x);
+ }
+ double bt = Math.exp(-logbab + alpha * Math.log(x) + beta * Math.log1p(-x));
+ if(x < (alpha + 1.0) / (alpha + beta + 2.0)) {
+ return bt * regularizedIncBetaCF(alpha, beta, x) / alpha;
+ }
+ else {
+ return 1.0 - bt * regularizedIncBetaCF(beta, alpha, 1.0 - x) / beta;
+ }
+ }
+
+ @Override
+ public double quantile(double x) {
+ // Valid parameters
+ if(x < 0 || x > 1 || Double.isNaN(x)) {
+ return Double.NaN;
+ }
+ if(x == 0) {
+ return 0.0;
+ }
+ if(x == 1) {
+ return 1.0;
+ }
+ // Simpler to compute inverse?
+ if(x > 0.5) {
+ return 1 - rawQuantile(1 - x, beta, alpha, logbab);
+ }
+ else {
+ return rawQuantile(x, alpha, beta, logbab);
+ }
+ }
+
+ @Override
+ public double nextRandom() {
+ double x = GammaDistribution.nextRandom(alpha, 1, random);
+ double y = GammaDistribution.nextRandom(beta, 1, random);
+ return x / (x + y);
+ }
+
+ @Override
+ public String toString() {
+ return "BetaDistribution(alpha=" + alpha + ", beta=" + beta + ")";
+ }
+
+ /**
+ * Static version of the CDF of the beta distribution
+ *
+ * @param val Value
+ * @param alpha Shape parameter a
+ * @param beta Shape parameter b
+ * @return cumulative density
+ */
+ public static double cdf(double val, double alpha, double beta) {
+ return regularizedIncBeta(val, alpha, beta);
+ }
+
+ /**
+ * Static version of the PDF of the beta distribution
+ *
+ * @param val Value
+ * @param alpha Shape parameter a
+ * @param beta Shape parameter b
+ * @return probability density
+ */
+ public static double pdf(double val, double alpha, double beta) {
+ if(alpha <= 0. || beta <= 0. || Double.isNaN(alpha) || Double.isNaN(beta) || Double.isNaN(val)) {
+ return Double.NaN;
+ }
+ if(val < 0. || val > 1.) {
+ return 0.;
+ }
+ if(val == 0.) {
+ if(alpha > 1.) {
+ return 0.;
+ }
+ if(alpha < 1.) {
+ return Double.POSITIVE_INFINITY;
+ }
+ return beta;
+ }
+ if(val == 1.) {
+ if(beta > 1.) {
+ return 0.;
+ }
+ if(beta < 1.) {
+ return Double.POSITIVE_INFINITY;
+ }
+ return alpha;
+ }
+ return Math.exp(-logBeta(alpha, beta) + Math.log(val) * (alpha - 1) + Math.log1p(-val) * (beta - 1));
+ }
+
+ /**
+ * Compute log beta(a,b)
+ *
+ * @param alpha Shape parameter a
+ * @param beta Shape parameter b
+ * @return Logarithm of result
+ */
+ public static double logBeta(double alpha, double beta) {
+ return GammaDistribution.logGamma(alpha) + GammaDistribution.logGamma(beta) - GammaDistribution.logGamma(alpha + beta);
+ }
+
+ /**
+ * Computes the regularized incomplete beta function I_x(a, b) which is also
+ * the CDF of the beta distribution. Based on the book "Numerical Recipes"
+ *
+ * @param alpha Parameter a
+ * @param beta Parameter b
+ * @param x Parameter x
+ * @return Value of the regularized incomplete beta function
+ */
+ public static double regularizedIncBeta(double x, double alpha, double beta) {
+ if(alpha <= 0.0 || beta <= 0.0 || Double.isNaN(alpha) || Double.isNaN(beta) || Double.isNaN(x)) {
+ return Double.NaN;
+ }
+ if(x <= 0.0) {
+ return 0.0;
+ }
+ if(x >= 1.0) {
+ return 1.0;
+ }
+ if(alpha > SWITCH && beta > SWITCH) {
+ return regularizedIncBetaQuadrature(alpha, beta, x);
+ }
+ double bt = Math.exp(-logBeta(alpha, beta) + alpha * Math.log(x) + beta * Math.log1p(-x));
+ if(x < (alpha + 1.0) / (alpha + beta + 2.0)) {
+ return bt * regularizedIncBetaCF(alpha, beta, x) / alpha;
+ }
+ else {
+ return 1.0 - bt * regularizedIncBetaCF(beta, alpha, 1.0 - x) / beta;
+ }
+ }
+
+ /**
+ * Returns the regularized incomplete beta function I_x(a, b) Includes the
+ * continued fraction way of computing, based on the book "Numerical Recipes".
+ *
+ * @param alpha Parameter a
+ * @param beta Parameter b
+ * @param x Parameter x
+ * @return result
+ */
+ protected static double regularizedIncBetaCF(double alpha, double beta, double x) {
+ final double FPMIN = Double.MIN_VALUE / NUM_PRECISION;
+ double qab = alpha + beta;
+ double qap = alpha + 1.0;
+ double qam = alpha - 1.0;
+ double c = 1.0;
+ double d = 1.0 - qab * x / qap;
+ if(Math.abs(d) < FPMIN) {
+ d = FPMIN;
+ }
+ d = 1.0 / d;
+ double h = d;
+ for(int m = 1; m < 10000; m++) {
+ int m2 = 2 * m;
+ double aa = m * (beta - m) * x / ((qam + m2) * (alpha + m2));
+ d = 1.0 + aa * d;
+ if(Math.abs(d) < FPMIN) {
+ d = FPMIN;
+ }
+ c = 1.0 + aa / c;
+ if(Math.abs(c) < FPMIN) {
+ c = FPMIN;
+ }
+ d = 1.0 / d;
+ h *= d * c;
+ aa = -(alpha + m) * (qab + m) * x / ((alpha + m2) * (qap + m2));
+ d = 1.0 + aa * d;
+ if(Math.abs(d) < FPMIN) {
+ d = FPMIN;
+ }
+ c = 1.0 + aa / c;
+ if(Math.abs(c) < FPMIN) {
+ c = FPMIN;
+ }
+ d = 1.0 / d;
+ double del = d * c;
+ h *= del;
+ if(Math.abs(del - 1.0) <= NUM_PRECISION) {
+ break;
+ }
+ }
+ return h;
+ }
+
+ /**
+ * Returns the regularized incomplete beta function I_x(a, b) by quadrature,
+ * based on the book "Numerical Recipes".
+ *
+ * @param alpha Parameter a
+ * @param beta Parameter b
+ * @param x Parameter x
+ * @return result
+ */
+ protected static double regularizedIncBetaQuadrature(double alpha, double beta, double x) {
+ double a1 = alpha - 1.0;
+ double b1 = beta - 1.0;
+ double mu = alpha / (alpha + beta);
+ double lnmu = Math.log(mu);
+ double lnmuc = Math.log1p(-mu);
+ double t = Math.sqrt(alpha * beta / ((alpha + beta) * (alpha + beta) * (alpha + beta + 1.0)));
+ double xu;
+ if(x > alpha / (alpha + beta)) {
+ if(x >= 1.0) {
+ return 1.0;
+ }
+ xu = Math.min(1.0, Math.max(mu + 10.0 * t, x + 5.0 * t));
+ }
+ else {
+ if(x <= 0.0) {
+ return 0.0;
+ }
+ xu = Math.max(0.0, Math.min(mu - 10.0 * t, x - 5.0 * t));
+ }
+ double sum = 0.0;
+ for(int i = 0; i < GAUSSLEGENDRE_Y.length; i++) {
+ t = x + (xu - x) * GAUSSLEGENDRE_Y[i];
+ sum += GAUSSLEGENDRE_W[i] * Math.exp(a1 * (Math.log(t) - lnmu) + b1 * (Math.log1p(-t) - lnmuc));
+ }
+ double ans = sum * (xu - x) * Math.exp(a1 * lnmu - GammaDistribution.logGamma(alpha) + b1 * lnmuc - GammaDistribution.logGamma(b1) + GammaDistribution.logGamma(alpha + beta));
+ return ans > 0 ? 1.0 - ans : -ans;
+ }
+
+ /**
+ * Compute quantile (inverse cdf) for Beta distributions.
+ *
+ * @param p Probability
+ * @param alpha Shape parameter a
+ * @param beta Shape parameter b
+ * @return Probit for Beta distribution
+ */
+ public static double quantile(double p, double alpha, double beta) {
+ // Valid parameters
+ if(Double.isNaN(alpha) || Double.isNaN(beta) || Double.isNaN(p) || alpha < 0. || beta < 0.) {
+ return Double.NaN;
+ }
+ if(p < 0 || p > 1) {
+ return Double.NaN;
+ }
+ if(p == 0) {
+ return 0.0;
+ }
+ if(p == 1) {
+ return 1.0;
+ }
+ // Simpler to compute inverse?
+ if(p > 0.5) {
+ return 1 - rawQuantile(1 - p, beta, alpha, logBeta(beta, alpha));
+ }
+ else {
+ return rawQuantile(p, alpha, beta, logBeta(alpha, beta));
+ }
+ }
+
+ /**
+ * Raw quantile function
+ *
+ * @param p P, must be 0 < p <= .5
+ * @param alpha Alpha
+ * @param beta Beta
+ * @param logbeta log Beta(alpha, beta)
+ * @return Position
+ */
+ protected static double rawQuantile(double p, double alpha, double beta, final double logbeta) {
+ // Initial estimate for x
+ double x;
+ {
+ // Very fast approximation of y.
+ double tmp = Math.sqrt(-2 * Math.log(p));
+ double y = tmp - (2.30753 + 0.27061 * tmp) / (1. + (0.99229 + 0.04481 * tmp) * tmp);
+
+ if(alpha > 1 && beta > 1) {
+ double r = (y * y - 3.) / 6.;
+ double s = 1. / (alpha + alpha - 1.);
+ double t = 1. / (beta + beta - 1.);
+ double h = 2. / (s + t);
+ double w = y * Math.sqrt(h + r) / h - (t - s) * (r + 5. / 6. - 2. / (3. * h));
+ x = alpha / (alpha + beta * Math.exp(w + w));
+ }
+ else {
+ double r = beta + beta;
+ double t = 1. / (9. * beta);
+ t = r * Math.pow(1. - t + y * Math.sqrt(t), 3.0);
+ if(t <= 0.) {
+ x = 1. - Math.exp((Math.log1p(-p) + Math.log(beta) + logbeta) / beta);
+ }
+ else {
+ t = (4. * alpha + r - 2.) / t;
+ if(t <= 1.) {
+ x = Math.exp((Math.log(p * alpha) + logbeta) / alpha);
+ }
+ else {
+ x = 1. - 2. / (t + 1.);
+ }
+ }
+ }
+ // Degenerate initial approximations
+ if(x < 3e-308 || x > 1 - 2.22e-16) {
+ x = 0.5;
+ }
+ }
+
+ // Newon-Raphson method using the CDF
+ {
+ final double ialpha = 1 - alpha;
+ final double ibeta = 1 - beta;
+
+ // Desired accuracy, as from GNU R adoption of AS 109
+ final double acu = Math.max(1e-300, Math.pow(10., -13 - 2.5 / (alpha * alpha) - .5 / (p * p)));
+ double prevstep = 0., y = 0., stepsize = 1;
+
+ for(int outer = 0; outer < 1000; outer++) {
+ // Current CDF value
+ double ynew = cdf(x, alpha, beta);
+ if(Double.isInfinite(ynew)) { // Degenerated.
+ return Double.NaN;
+ }
+ // Error gradient
+ ynew = (ynew - p) * Math.exp(logbeta + ialpha * Math.log(x) + ibeta * Math.log1p(-x));
+ if(ynew * y <= 0.) {
+ prevstep = Math.max(Math.abs(stepsize), 3e-308);
+ }
+ // Inner loop: try different step sizes: y * 3^-i
+ double g = 1, xnew = 0.;
+ for(int inner = 0; inner < 1000; inner++) {
+ stepsize = g * ynew;
+ if(Math.abs(stepsize) < prevstep) {
+ xnew = x - stepsize; // Candidate x
+ if(xnew >= 0. && xnew <= 1.) {
+ // Close enough
+ if(prevstep <= acu || Math.abs(ynew) <= acu) {
+ return x;
+ }
+ if(xnew != 0. && xnew != 1.) {
+ break;
+ }
+ }
+ }
+ g /= 3.;
+ }
+ // Convergence
+ if(Math.abs(xnew - x) < 1e-15 * x) {
+ return x;
+ }
+ // Iterate with new values
+ x = xnew;
+ y = ynew;
+ }
+ }
+ // Not converged in Newton-Raphson
+ throw new AbortException("Beta quantile computation did not converge.");
+ }
+} \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/ChiDistribution.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/ChiDistribution.java
index c561c4cd..84c86e98 100644
--- a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/ChiDistribution.java
+++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/ChiDistribution.java
@@ -1,5 +1,7 @@
package de.lmu.ifi.dbs.elki.math.statistics.distribution;
+import de.lmu.ifi.dbs.elki.utilities.exceptions.ExceptionMessages;
+
/*
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
@@ -27,8 +29,10 @@ package de.lmu.ifi.dbs.elki.math.statistics.distribution;
* Chi distribution.
*
* @author Erich Schubert
+ *
+ * @apiviz.composedOf ChiSquaredDistribution
*/
-public class ChiDistribution implements Distribution {
+public class ChiDistribution implements DistributionWithRandom {
/**
* Degrees of freedom. Usually integer.
*/
@@ -89,4 +93,15 @@ public class ChiDistribution implements Distribution {
public static double cdf(double val, double dof) {
return GammaDistribution.regularizedGammaP(dof / 2, val * val / 2);
}
+
+ // FIXME: implement!
+ @Override
+ public double quantile(double val) {
+ throw new UnsupportedOperationException(ExceptionMessages.UNSUPPORTED_NOT_YET);
+ }
+
+ @Override
+ public String toString() {
+ return "ChiDistribution(dof=" + dof + ")";
+ }
} \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/ChiSquaredDistribution.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/ChiSquaredDistribution.java
index 0ab39c78..8555afd3 100644
--- a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/ChiSquaredDistribution.java
+++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/ChiSquaredDistribution.java
@@ -1,5 +1,7 @@
package de.lmu.ifi.dbs.elki.math.statistics.distribution;
+import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
+
/*
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
@@ -35,7 +37,7 @@ public class ChiSquaredDistribution extends GammaDistribution {
* @param dof Degrees of freedom.
*/
public ChiSquaredDistribution(double dof) {
- super(.5 * dof, 2.0);
+ super(.5 * dof, .5);
}
/**
@@ -69,4 +71,23 @@ public class ChiSquaredDistribution extends GammaDistribution {
}
return Math.exp((k - 1.0) * Math.log(x * 2.0) - x * 2.0 - logGamma(k)) * 2.0;
}
+
+ /**
+ * Return the quantile function for this distribution
+ *
+ * Reference:
+ * <p>
+ * Algorithm AS 91: The percentage points of the $\chi$^2 distribution<br />
+ * D.J. Best, D. E. Roberts<br />
+ * Journal of the Royal Statistical Society. Series C (Applied Statistics)
+ * </p>
+ *
+ * @param x Quantile
+ * @param dof Degrees of freedom
+ * @return quantile position
+ */
+ @Reference(title = "Algorithm AS 91: The percentage points of the $\\chi^2$ distribution", authors = "D.J. Best, D. E. Roberts", booktitle = "Journal of the Royal Statistical Society. Series C (Applied Statistics)")
+ public static double quantile(double x, double dof) {
+ return GammaDistribution.quantile(x, .5 * dof, .5);
+ }
} \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/ConstantDistribution.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/ConstantDistribution.java
index 1f36dd4a..b046f0ef 100644
--- a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/ConstantDistribution.java
+++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/ConstantDistribution.java
@@ -28,7 +28,7 @@ package de.lmu.ifi.dbs.elki.math.statistics.distribution;
*
* @author Erich Schubert
*/
-public class ConstantDistribution implements Distribution {
+public class ConstantDistribution implements DistributionWithRandom {
/**
* The constant
*/
@@ -58,4 +58,9 @@ public class ConstantDistribution implements Distribution {
public double cdf(double val) {
return (val >= c) ? 1.0 : 0.0;
}
+
+ @Override
+ public double quantile(double val) {
+ return c;
+ }
} \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/Distribution.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/Distribution.java
index 290e6434..5b6cd286 100644
--- a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/Distribution.java
+++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/Distribution.java
@@ -24,20 +24,14 @@ package de.lmu.ifi.dbs.elki.math.statistics.distribution;
*/
/**
- * Interface for a simple distribution generator with a PDF, i.e. it can also
- * compute a density
+ * Statistical distributions, with their common functions. See
+ * {@link DistributionWithRandom} for distributions that also have a random
+ * generator included.
*
* @author Erich Schubert
*/
public interface Distribution {
/**
- * Generate a new random value
- *
- * @return new random value
- */
- public double nextRandom();
-
- /**
* Return the density of an existing value
*
* @param val existing value
@@ -54,6 +48,14 @@ public interface Distribution {
public double cdf(double val);
/**
+ * Quantile aka probit (for normal) aka inverse CDF (invcdf, cdf^-1) function.
+ *
+ * @param val Quantile to find
+ * @return Quantile position
+ */
+ public double quantile(double val);
+
+ /**
* Describe the distribution
*
* @return description
diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/DistributionWithRandom.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/DistributionWithRandom.java
new file mode 100644
index 00000000..af272528
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/DistributionWithRandom.java
@@ -0,0 +1,37 @@
+package de.lmu.ifi.dbs.elki.math.statistics.distribution;
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2012
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/**
+ * Distribution that also has support for generating random numbers.
+ *
+ * @author Erich Schubert
+ */
+public interface DistributionWithRandom extends Distribution {
+ /**
+ * Generate a new random value
+ *
+ * @return new random value
+ */
+ public double nextRandom();
+}
diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/GammaDistribution.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/GammaDistribution.java
index 6830f25a..21eebc51 100644
--- a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/GammaDistribution.java
+++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/GammaDistribution.java
@@ -25,14 +25,21 @@ package de.lmu.ifi.dbs.elki.math.statistics.distribution;
import java.util.Random;
+import de.lmu.ifi.dbs.elki.logging.LoggingUtil;
import de.lmu.ifi.dbs.elki.math.MathUtil;
+import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
/**
* Gamma Distribution, with random generation and density functions.
*
* @author Erich Schubert
*/
-public class GammaDistribution implements Distribution {
+public class GammaDistribution implements DistributionWithRandom {
+ /**
+ * Euler–Mascheroni constant
+ */
+ public static final double EULERS_CONST = 0.5772156649015328606065120900824024;
+
/**
* LANCZOS-Coefficients for Gamma approximation.
*
@@ -102,6 +109,11 @@ public class GammaDistribution implements Distribution {
}
@Override
+ public double quantile(double val) {
+ return quantile(val, k, theta);
+ }
+
+ @Override
public double nextRandom() {
return nextRandom(k, theta, random);
}
@@ -113,7 +125,7 @@ public class GammaDistribution implements Distribution {
*/
@Override
public String toString() {
- return "Gamma Distribution (k=" + k + ", theta=" + theta + ")";
+ return "GammaDistribution(k=" + k + ", theta=" + theta + ")";
}
/**
@@ -139,7 +151,19 @@ public class GammaDistribution implements Distribution {
* @return cdf value
*/
public static double cdf(double val, double k, double theta) {
- return regularizedGammaP(k, val / theta);
+ return regularizedGammaP(k, val * theta);
+ }
+
+ /**
+ * The log CDF, static version.
+ *
+ * @param val Value
+ * @param k Shape k
+ * @param theta Theta = 1.0/Beta aka. "scaling" parameter
+ * @return cdf value
+ */
+ public static double logcdf(double val, double k, double theta) {
+ return logregularizedGammaP(k, val * theta);
}
/**
@@ -170,6 +194,33 @@ public class GammaDistribution implements Distribution {
}
/**
+ * Gamma distribution PDF (with 0.0 for x &lt; 0)
+ *
+ * @param x query value
+ * @param k Alpha
+ * @param theta Theta = 1 / Beta
+ * @return probability density
+ */
+ public static double logpdf(double x, double k, double theta) {
+ if(x < 0) {
+ return Double.NEGATIVE_INFINITY;
+ }
+ if(x == 0) {
+ if(k == 1.0) {
+ return Math.log(theta);
+ }
+ else {
+ return Double.NEGATIVE_INFINITY;
+ }
+ }
+ if(k == 1.0) {
+ return Math.log(theta) - x * theta;
+ }
+
+ return Math.log(theta) + (k - 1.0) * Math.log(x * theta) - x * theta - logGamma(k);
+ }
+
+ /**
* Compute logGamma.
*
* Based loosely on "Numerical Recpies" and the work of Paul Godfrey at
@@ -195,6 +246,22 @@ public class GammaDistribution implements Distribution {
}
/**
+ * Compute the regular Gamma function.
+ *
+ * Note: for numerical reasons, it is preferrable to use {@link #logGamma}
+ * when possible! In particular, this method just computes
+ * {@code Math.exp(logGamma(x))} anyway.
+ *
+ * Try to postpone the {@code Math.exp} call to preserve numeric range!
+ *
+ * @param x Position
+ * @return Gamma at this position
+ */
+ public static double gamma(double x) {
+ return Math.exp(logGamma(x));
+ }
+
+ /**
* Returns the regularized gamma function P(a, x).
*
* Includes the quadrature way of computing.
@@ -236,6 +303,49 @@ public class GammaDistribution implements Distribution {
}
/**
+ * Returns the regularized gamma function log P(a, x).
+ *
+ * Includes the quadrature way of computing.
+ *
+ * TODO: find "the" most accurate version of this. We seem to agree with
+ * others for the first 10+ digits, but diverge a bit later than that.
+ *
+ * @param a Parameter a
+ * @param x Parameter x
+ * @return Gamma value
+ */
+ public static double logregularizedGammaP(final double a, final double x) {
+ // Special cases
+ if(Double.isNaN(a) || Double.isNaN(x) || (a <= 0.0) || (x < 0.0)) {
+ return Double.NaN;
+ }
+ if(x == 0.0) {
+ return Double.NEGATIVE_INFINITY;
+ }
+ if(x >= a + 1) {
+ // Expected to converge faster
+ // FIXME: and in log?
+ return Math.log(1.0 - regularizedGammaQ(a, x));
+ }
+ // Loosely following "Numerical Recipes"
+ double del = 1.0 / a;
+ double sum = del;
+ for(int n = 1; n < Integer.MAX_VALUE; n++) {
+ // compute next element in the series
+ del *= x / (a + n);
+ sum = sum + del;
+ if(Math.abs(del / sum) < NUM_PRECISION || sum >= Double.POSITIVE_INFINITY) {
+ break;
+ }
+ }
+ if(Double.isInfinite(sum)) {
+ return 0;
+ }
+ // TODO: reread numerical recipes, can we replace log(sum)?
+ return -x + (a * Math.log(x)) - logGamma(a) + Math.log(sum);
+ }
+
+ /**
* Returns the regularized gamma function Q(a, x) = 1 - P(a, x).
*
* Includes the continued fraction way of computing, based loosely on the book
@@ -313,7 +423,7 @@ public class GammaDistribution implements Distribution {
final double e1 = 1.000000000, e2 = 0.499999994, e3 = 0.166666848;
final double e4 = 0.041664508, e5 = 0.008345522, e6 = 0.001353826;
final double e7 = 0.000247453;
-
+
if(k < 1.0) { // Base case, for small k
final double b = 1.0 + 0.36788794412 * k; // Step 1
while(true) {
@@ -360,11 +470,11 @@ public class GammaDistribution implements Distribution {
/* v2 = tv2; */
v12 = tv12;
}
-
+
// double b = 0.0, c = 0.0;
// double si = 0.0, q0 = 0.0;
final double b, c, si, q0;
-
+
// Simpler accept cases & parameter computation
{
final double t = v1 * Math.sqrt(-2.0 * Math.log(v12) / v12);
@@ -373,14 +483,14 @@ public class GammaDistribution implements Distribution {
if(t >= 0.0) {
return (gds / theta); // Immediate acceptance
}
-
+
// Random uniform
final double un = random.nextDouble();
// Squeeze acceptance
if(d * un <= t * t * t) {
return (gds / theta);
}
-
+
if(k != -1.0) { // Step 4. Set-up for hat case
final double r = 1.0 / k;
q0 = ((((((((q9 * r + q8) * r + q7) * r + q6) * r + q5) * r + q4) * r + q3) * r + q2) * r + q1) * r;
@@ -425,7 +535,7 @@ public class GammaDistribution implements Distribution {
}
}
}
-
+
// Double exponential deviate t
while(true) {
double e, u, sign_u, t;
@@ -438,7 +548,7 @@ public class GammaDistribution implements Distribution {
t = b + (e * si) * sign_u;
}
while(t <= -0.71874483771719);
-
+
// New v(t) and q(t)
final double v = t / (s + s);
final double q;
@@ -467,4 +577,370 @@ public class GammaDistribution implements Distribution {
}
}
}
+
+ /**
+ * Approximate probit for chi squared distribution
+ *
+ * Based on first half of algorithm AS 91
+ *
+ * Reference:
+ * <p>
+ * Algorithm AS 91: The percentage points of the $\chi$ 2 distribution<br />
+ * D.J. Best, D. E. Roberts<br />
+ * Journal of the Royal Statistical Society. Series C (Applied Statistics)
+ * </p>
+ *
+ * @param p Probit value
+ * @param nu Shape parameter for Chi, nu = 2 * k
+ * @param g log(nu)
+ * @return Probit for chi squared
+ */
+ @Reference(title = "Algorithm AS 91: The percentage points of the $\\chi^2$ distribution", authors = "D.J. Best, D. E. Roberts", booktitle = "Journal of the Royal Statistical Society. Series C (Applied Statistics)")
+ protected static double chisquaredProbitApproximation(final double p, double nu, double g) {
+ final double EPS1 = 1e-2; // Approximation quality
+ // Sanity checks
+ if(Double.isNaN(p) || Double.isNaN(nu)) {
+ return Double.NaN;
+ }
+ // Range check
+ if(p <= 0) {
+ return 0;
+ }
+ if(p >= 1) {
+ return Double.POSITIVE_INFINITY;
+ }
+ // Invalid parameters
+ if(nu <= 0) {
+ return Double.NaN;
+ }
+ // Shape of gamma distribution, "XX" in AS 91
+ final double k = 0.5 * nu;
+
+ // For small chi squared values - AS 91
+ final double logp = Math.log(p);
+ if(nu < -1.24 * logp) {
+ // FIXME: implement and use logGammap1 instead - more stable?
+ //
+ // final double lgam1pa = (alpha < 0.5) ? logGammap1(alpha) :
+ // (Math.log(alpha) + g);
+ // return Math.exp((lgam1pa + logp) / alpha + MathUtil.LOG2);
+ // This is literal AS 91, above is the GNU R variant.
+ return Math.pow(p * k * Math.exp(g + k * MathUtil.LOG2), 1 / k);
+ }
+ else if(nu > 0.32) {
+ // Wilson and Hilferty estimate: - AS 91 at 3
+ final double x = NormalDistribution.quantile(p, 0, 1);
+ final double p1 = 2. / (9. * nu);
+ double ch = nu * Math.pow(x * Math.sqrt(p1) + 1 - p1, 3);
+
+ // Better approximation for p tending to 1:
+ if(ch > 2.2 * nu + 6) {
+ ch = -2 * (Math.log1p(-p) - (k - 1) * Math.log(0.5 * ch) + g);
+ }
+ return ch;
+ }
+ else {
+ // nu <= 0.32, AS 91 at 1
+ final double C7 = 4.67, C8 = 6.66, C9 = 6.73, C10 = 13.32;
+ final double ag = Math.log1p(-p) + g + (k - 1) * MathUtil.LOG2;
+ double ch = 0.4;
+ while(true) {
+ final double p1 = 1 + ch * (C7 + ch);
+ final double p2 = ch * (C9 + ch * (C8 + ch));
+ final double t = -0.5 + (C7 + 2 * ch) / p1 - (C9 + ch * (C10 + 3 * ch)) / p2;
+ final double delta = (1 - Math.exp(ag + 0.5 * ch) * p2 / p1) / t;
+ ch -= delta;
+ if(Math.abs(delta) > EPS1 * Math.abs(ch)) {
+ return ch;
+ }
+ }
+ }
+ }
+
+ /**
+ * Compute probit (inverse cdf) for Gamma distributions.
+ *
+ * Based on algorithm AS 91:
+ *
+ * Reference:
+ * <p>
+ * Algorithm AS 91: The percentage points of the $\chi$^2 distribution<br />
+ * D.J. Best, D. E. Roberts<br />
+ * Journal of the Royal Statistical Society. Series C (Applied Statistics)
+ * </p>
+ *
+ * @param p Probability
+ * @param k k, alpha aka. "shape" parameter
+ * @param theta Theta = 1.0/Beta aka. "scaling" parameter
+ * @return Probit for Gamma distribution
+ */
+ @Reference(title = "Algorithm AS 91: The percentage points of the $\\chi$^2 distribution", authors = "D.J. Best, D. E. Roberts", booktitle = "Journal of the Royal Statistical Society. Series C (Applied Statistics)")
+ public static double quantile(double p, double k, double theta) {
+ final double EPS2 = 5e-7; // final precision of AS 91
+ final int MAXIT = 1000;
+
+ // Avoid degenerates
+ if(Double.isNaN(p) || Double.isNaN(k) || Double.isNaN(theta)) {
+ return Double.NaN;
+ }
+ // Range check
+ if(p <= 0) {
+ return 0;
+ }
+ if(p >= 1) {
+ return Double.POSITIVE_INFINITY;
+ }
+ // Shape parameter check
+ if(k < 0 || theta <= 0) {
+ return Double.NaN;
+ }
+ // Corner case - all at 0
+ if(k == 0) {
+ return 0.;
+ }
+
+ int max_newton_iterations = 1;
+ // For small values, ensure some refinement iterations
+ if(k < 1e-10) {
+ max_newton_iterations = 7;
+ }
+
+ final double g = logGamma(k); // == logGamma(v/2)
+
+ // Phase I, an initial rough approximation
+ // First half of AS 91
+ double ch = chisquaredProbitApproximation(p, 2 * k, g);
+ // Second hald of AS 91 follows:
+ // Refine ChiSquared approximation
+ chisq: {
+ if(Double.isInfinite(ch)) {
+ // Cannot refine infinity
+ max_newton_iterations = 0;
+ break chisq;
+ }
+ if(ch < EPS2) {
+ // Do not iterate, but refine with newton method
+ max_newton_iterations = 20;
+ break chisq;
+ }
+ if(p > 1 - 1e-14 || p < 1e-100) {
+ // Not in appropriate value range for AS 91
+ max_newton_iterations = 20;
+ break chisq;
+ }
+
+ // Phase II: Iteration
+ final double c = k - 1;
+ final double ch0 = ch; // backup initial approximation
+ for(int i = 1; i <= MAXIT; i++) {
+ final double q = ch; // previous approximation
+ final double p1 = 0.5 * ch;
+ final double p2 = p - regularizedGammaP(k, p1);
+ if(Double.isInfinite(p2) || ch <= 0) {
+ ch = ch0;
+ max_newton_iterations = 27;
+ break chisq;
+ }
+ { // Taylor series of AS 91: iteration via "goto 4"
+ final double t = p2 * Math.exp(k * MathUtil.LOG2 + g + p1 - c * Math.log(ch));
+ final double b = t / ch;
+ final double a = 0.5 * t - b * c;
+ final double s1 = (210. + a * (140. + a * (105. + a * (84. + a * (70. + 60. * a))))) / 420.;
+ final double s2 = (420. + a * (735. + a * (966. + a * (1141. + 1278 * a)))) / 2520.;
+ final double s3 = (210. + a * (462. + a * (707. + 932. * a))) / 2520.;
+ final double s4 = (252. + a * (672. + 1182. * a) + c * (294. + a * (889. + 1740. * a))) / 5040.;
+ final double s5 = (84. + 2264. * a + c * (1175. + 606. * a)) / 2520.;
+ final double s6 = (120. + c * (346. + 127. * c)) / 5040.;
+ ch += t * (1 + 0.5 * t * s1 - b * c * (s1 - b * (s2 - b * (s3 - b * (s4 - b * (s5 - b * s6))))));
+ }
+ if(Math.abs(q - ch) < EPS2 * ch) {
+ break chisq;
+ }
+ // Divergence treatment, from GNU R
+ if(Math.abs(q - ch) > 0.1 * Math.abs(ch)) {
+ ch = ((ch < q) ? 0.9 : 1.1) * q;
+ }
+ }
+ LoggingUtil.warning("No convergence in AS 91 Gamma probit.");
+ // no convergence in MAXIT iterations -- but we add Newton now...
+ }
+ double x = 0.5 * ch / theta;
+ if(max_newton_iterations > 0) {
+ // Refine result using final Newton steps.
+ // TODO: add unit tests that show an improvement! Maybe in logscale only?
+ x = gammaQuantileNewtonRefinement(Math.log(p), k, theta, max_newton_iterations, x);
+ }
+ return x;
+ }
+
+ /**
+ * Refinement of ChiSquared probit using Newton iterations.
+ *
+ * A trick used by GNU R to improve precision.
+ *
+ * @param logpt Target value of log p
+ * @param k Alpha
+ * @param theta Theta = 1 / Beta
+ * @param maxit Maximum number of iterations to do
+ * @param x Initial estimate
+ * @return Refined value
+ */
+ protected static double gammaQuantileNewtonRefinement(final double logpt, final double k, final double theta, final int maxit, double x) {
+ final double EPS_N = 1e-15; // Precision threshold
+ // 0 is not possible, try MIN_NORMAL instead
+ if(x <= 0) {
+ x = Double.MIN_NORMAL;
+ }
+ // Current estimation
+ double logpc = logcdf(x, k, theta);
+ if(x == Double.MIN_NORMAL && logpc > logpt * (1. + 1e-7)) {
+ return 0.;
+ }
+ if(logpc == Double.NEGATIVE_INFINITY) {
+ return 0.;
+ }
+ // Refine by newton iterations
+ for(int i = 0; i < maxit; i++) {
+ // Error of current approximation
+ final double logpe = logpc - logpt;
+ if(Math.abs(logpe) < Math.abs(EPS_N * logpt)) {
+ break;
+ }
+ // Step size is controlled by PDF:
+ final double g = logpdf(x, k, theta);
+ if(g == Double.NEGATIVE_INFINITY) {
+ break;
+ }
+ final double newx = x - logpe * Math.exp(logpc - g);
+ // New estimate:
+ logpc = logcdf(newx, k, theta);
+ if(Math.abs(logpc - logpt) > Math.abs(logpe) || (i > 0 && Math.abs(logpc - logpt) == Math.abs(logpe))) {
+ // no further improvement
+ break;
+ }
+ x = newx;
+ }
+ return x;
+ }
+
+ /**
+ * Compute the Psi / Digamma function
+ *
+ * Reference:
+ * <p>
+ * J. M. Bernando<br />
+ * Algorithm AS 103: Psi (Digamma) Function<br />
+ * Statistical Algorithms
+ * </p>
+ *
+ * TODO: is there a more accurate version maybe in R?
+ *
+ * @param x Position
+ * @return digamma value
+ */
+ @Reference(authors = "J. M. Bernando", title = "Algorithm AS 103: Psi (Digamma) Function", booktitle = "Statistical Algorithms")
+ public static double digamma(double x) {
+ if(!(x > 0)) {
+ return Double.NaN;
+ }
+ // Method of equation 5:
+ if(x <= 1e-5) {
+ return -EULERS_CONST - 1. / x;
+ }
+ // Method of equation 4:
+ else if(x > 49.) {
+ final double ix2 = 1. / (x * x);
+ // Partial series expansion
+ return Math.log(x) - 0.5 / x - ix2 * ((1.0 / 12.) + ix2 * (1.0 / 120. - ix2 / 252.));
+ // + O(x^8) error
+ }
+ else {
+ // Stirling expansion
+ return digamma(x + 1.) - 1. / x;
+ }
+ }
+
+ /**
+ * Compute the Trigamma function. Based on digamma.
+ *
+ * TODO: is there a more accurate version maybe in R?
+ *
+ * @param x Position
+ * @return trigamma value
+ */
+ public static double trigamma(double x) {
+ if(!(x > 0)) {
+ return Double.NaN;
+ }
+ // Method of equation 5:
+ if(x <= 1e-5) {
+ return 1. / (x * x);
+ }
+ // Method of equation 4:
+ else if(x > 49.) {
+ final double ix2 = 1. / (x * x);
+ // Partial series expansion
+ return 1 / x - ix2 / 2. + ix2 / x * (1.0 / 6. - ix2 * (1.0 / 30. + ix2 / 42.));
+ // + O(x^8) error
+ }
+ else {
+ // Stirling expansion
+ return trigamma(x + 1.) - 1. / (x * x);
+ }
+ }
+
+ /**
+ * Mean least squares estimation of Gamma distribution to a set of
+ * observations.
+ *
+ * @param data Data
+ * @return Assumed distribution
+ */
+ public static GammaDistribution estimate(double[] data) {
+ return estimate(data, data.length);
+ }
+
+ /**
+ * Mean least squares estimation of Gamma distribution to a set of
+ * observations.
+ *
+ * Reference:
+ * <p>
+ * Maximum likelihood estimation of the parameters of the gamma distribution
+ * and their bias<br />
+ * S. C. Choi, R. Wette<br />
+ * in: Technometrics
+ * </p>
+ *
+ * @param data Data
+ * @param len Length of array
+ * @return Assumed distribution
+ */
+ @Reference(title = "Maximum likelihood estimation of the parameters of the gamma distribution and their bias", authors = "S. C. Choi, R. Wette", booktitle = "Technometrics", url = "http://www.jstor.org/stable/10.2307/1266892")
+ public static GammaDistribution estimate(double[] data, int len) {
+ double meanx = 0, meanlogx = 0;
+ for(int i = 0; i < len; i++) {
+ final double logx = Math.log(data[i]);
+ final double deltax = data[i] - meanx;
+ final double deltalogx = logx - meanlogx;
+ meanx += deltax / (i + 1.);
+ meanlogx += deltalogx / (i + 1.);
+ }
+ // Initial approximation
+ final double logmeanx = Math.log(meanx);
+ final double diff = logmeanx - meanlogx;
+ double k = (3 - diff + Math.sqrt((diff - 3) * (diff - 3) + 24 * diff)) / (12 * diff);
+
+ // Refine via newton iteration, based on Choi and Wette equation
+ while(true) {
+ double kdelta = (Math.log(k) - digamma(k) - diff) / (1 / k - trigamma(k));
+ if(Math.abs(kdelta) < 1E-8 || Double.isNaN(kdelta)) {
+ break;
+ }
+ k += kdelta;
+ }
+ // Estimate theta:
+ final double theta = k / meanx;
+ return new GammaDistribution(k, theta);
+ }
} \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/NormalDistribution.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/NormalDistribution.java
index 919cc2e3..9180b59e 100644
--- a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/NormalDistribution.java
+++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/NormalDistribution.java
@@ -32,7 +32,7 @@ import de.lmu.ifi.dbs.elki.math.MathUtil;
*
* @author Erich Schubert
*/
-public class NormalDistribution implements Distribution {
+public class NormalDistribution implements DistributionWithRandom {
/**
* Coefficients for erf approximation.
*
@@ -148,20 +148,25 @@ public class NormalDistribution implements Distribution {
public double pdf(double val) {
return pdf(val, mean, stddev);
}
-
+
@Override
public double cdf(double val) {
return cdf(val, mean, stddev);
}
@Override
+ public double quantile(double q) {
+ return quantile(q, mean, stddev);
+ }
+
+ @Override
public double nextRandom() {
return mean + random.nextGaussian() * stddev;
}
@Override
public String toString() {
- return "Normal Distribution (mean="+mean+", stddev="+stddev+")";
+ return "NormalDistribution(mean=" + mean + ", stddev=" + stddev + ")";
}
/**
@@ -195,7 +200,7 @@ public class NormalDistribution implements Distribution {
if(Double.isInfinite(x)) {
return (x < 0.0) ? 2 : 0;
}
-
+
double result = Double.NaN;
double absx = Math.abs(x);
// First approximation interval
@@ -249,7 +254,7 @@ public class NormalDistribution implements Distribution {
* @return erfinv(x)
*/
public static double erfinv(double x) {
- return standardNormalProbit(0.5 * (x + 1)) / MathUtil.SQRT2;
+ return standardNormalQuantile(0.5 * (x + 1)) / MathUtil.SQRT2;
}
/**
@@ -261,10 +266,13 @@ public class NormalDistribution implements Distribution {
* by Peter John Acklam
* </p>
*
+ * FIXME: precision of this seems to be rather low, compared to our other
+ * functions. Only about 8-9 digits agree with SciPy/GNU R.
+ *
* @param d Quantile. Must be in [0:1], obviously.
* @return Inverse erf.
*/
- public static double standardNormalProbit(double d) {
+ public static double standardNormalQuantile(double d) {
if(d == 0) {
return Double.NEGATIVE_INFINITY;
}
@@ -319,7 +327,7 @@ public class NormalDistribution implements Distribution {
* @return The CDF of the normal given distribution at x.
*/
public static double cdf(double x, double mu, double sigma) {
- return (1 + erf(x / Math.sqrt(2))) / 2;
+ return .5 * (1 + erf((x - mu) / (MathUtil.SQRT2 * sigma)));
}
/**
@@ -331,7 +339,7 @@ public class NormalDistribution implements Distribution {
* @param sigma Standard deviation.
* @return The probit of the normal given distribution at x.
*/
- public static double probit(double x, double mu, double sigma) {
- return mu + sigma * standardNormalProbit(x);
+ public static double quantile(double x, double mu, double sigma) {
+ return mu + sigma * standardNormalQuantile(x);
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/PoissonDistribution.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/PoissonDistribution.java
new file mode 100644
index 00000000..53fb0dc8
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/PoissonDistribution.java
@@ -0,0 +1,411 @@
+package de.lmu.ifi.dbs.elki.math.statistics.distribution;
+
+import de.lmu.ifi.dbs.elki.math.MathUtil;
+import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
+import de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException;
+import de.lmu.ifi.dbs.elki.utilities.exceptions.ExceptionMessages;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2012
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/**
+ * INCOMPLETE implementation of the poisson distribution.
+ *
+ * TODO: continue implementing, CDF, invcdf and nextRandom are missing
+ *
+ * References:
+ * <p>
+ * Catherine Loader<br />
+ * Fast and Accurate Computation of Binomial Probabilities.
+ * </p>
+ *
+ * @author Erich Schubert
+ */
+public class PoissonDistribution implements Distribution {
+ /**
+ * Number of tries
+ */
+ private int n;
+
+ /**
+ * Success probability
+ */
+ private double p;
+
+ /** Stirling error constants: 1./12 */
+ private final static double S0 = 0.08333333333333333333333d;
+
+ /** Stirling error constants: 1./360 */
+ private final static double S1 = 0.0027777777777777777777777777778d;
+
+ /** Stirling error constants: 1./1260 */
+ private final static double S2 = 0.00079365079365079365079365d;
+
+ /** Stirling error constants: 1./1680 */
+ private final static double S3 = 0.000595238095238095238095238095d;
+
+ /** Stirling error constants: 1./1188 */
+ private final static double S4 = 0.00084175084175084175084175084175d;
+
+ /**
+ * Exact table values for n <= 15 in steps of 0.5
+ *
+ * sfe[n] = ln( (n!*e^n)/((n^n)*sqrt(2*pi*n)) )
+ */
+ private final static double STIRLING_EXACT_ERROR[] = {//
+ 0.0, // 0.0
+ 0.1534264097200273452913848, // 0.5
+ 0.0810614667953272582196702, // 1.0
+ 0.0548141210519176538961390, // 1.5
+ 0.0413406959554092940938221, // 2.0
+ 0.03316287351993628748511048, // 2.5
+ 0.02767792568499833914878929, // 3.0
+ 0.02374616365629749597132920, // 3.5
+ 0.02079067210376509311152277, // 4.0
+ 0.01848845053267318523077934, // 4.5
+ 0.01664469118982119216319487, // 5.0
+ 0.01513497322191737887351255, // 5.5
+ 0.01387612882307074799874573, // 6.0
+ 0.01281046524292022692424986, // 6.5
+ 0.01189670994589177009505572, // 7.0
+ 0.01110455975820691732662991, // 7.5
+ 0.010411265261972096497478567, // 8.0
+ 0.009799416126158803298389475, // 8.5
+ 0.009255462182712732917728637, // 9.0
+ 0.008768700134139385462952823, // 9.5
+ 0.008330563433362871256469318, // 10.0
+ 0.007934114564314020547248100, // 10.5
+ 0.007573675487951840794972024, // 11.0
+ 0.007244554301320383179543912, // 11.5
+ 0.006942840107209529865664152, // 12.0
+ 0.006665247032707682442354394, // 12.5
+ 0.006408994188004207068439631, // 13.0
+ 0.006171712263039457647532867, // 13.5
+ 0.005951370112758847735624416, // 14.0
+ 0.005746216513010115682023589, // 14.5
+ 0.0055547335519628013710386900 // 15.0
+ };
+
+ /**
+ * Constructor.
+ *
+ * Private: API not yet completely implemented!
+ *
+ * @param n Number of tries
+ * @param p Success probability
+ */
+ public PoissonDistribution(int n, double p) {
+ super();
+ this.n = n;
+ this.p = p;
+ }
+
+ /**
+ * Poisson PMF for integer values.
+ *
+ * @param x integer values
+ * @return Probability
+ */
+ @Reference(title = "Fast and accurate computation of binomial probabilities", authors = "C. Loader", booktitle = "", url = "http://projects.scipy.org/scipy/raw-attachment/ticket/620/loader2000Fast.pdf")
+ public double pmf(int x) {
+ // Invalid values
+ if(x < 0 || x > n) {
+ return 0.0;
+ }
+ // Extreme probabilities
+ if(p <= 0d) {
+ return x == 0 ? 1.0 : 0.0;
+ }
+ if(p >= 1d) {
+ return x == n ? 1.0 : 0.0;
+ }
+ // Extreme values of x
+ if(x == 0) {
+ if(p < 0.1) {
+ return Math.exp(-devianceTerm(n, n * (1.0 - p)) - n * p);
+ }
+ else {
+ return Math.exp(n * Math.log(1.0 - p));
+ }
+ }
+ if(x == n) {
+ if(p > 0.9) {
+ return Math.exp(-devianceTerm(n, n * p) - n * (1 - p));
+ }
+ else {
+ return Math.exp(n * Math.log(p));
+ }
+ }
+
+ final double lc = stirlingError(n) - stirlingError(x) - stirlingError(n - x) - devianceTerm(x, n * p) - devianceTerm(n - x, n * (1.0 - p));
+ final double f = (MathUtil.TWOPI * x * (n - x)) / n;
+ return Math.exp(lc) / Math.sqrt(f);
+ }
+
+ @Override
+ @Reference(title = "Fast and accurate computation of binomial probabilities", authors = "C. Loader", booktitle = "", url = "http://projects.scipy.org/scipy/raw-attachment/ticket/620/loader2000Fast.pdf")
+ public double pdf(double x) {
+ // Invalid values
+ if(x < 0 || x > n) {
+ return 0.0;
+ }
+ // Extreme probabilities
+ if(p <= 0d) {
+ return x == 0 ? 1.0 : 0.0;
+ }
+ if(p >= 1d) {
+ return x == n ? 1.0 : 0.0;
+ }
+ final double q = 1 - p;
+ // FIXME: check for x to be integer, return 0 otherwise?
+
+ // Extreme values of x
+ if(x == 0) {
+ if(p < 0.1) {
+ return Math.exp(-devianceTerm(n, n * q) - n * p);
+ }
+ else {
+ return Math.exp(n * Math.log(q));
+ }
+ }
+ if(x == n) {
+ if(p > 0.9) {
+ return Math.exp(-devianceTerm(n, n * p) - n * q);
+ }
+ else {
+ return Math.exp(n * Math.log(p));
+ }
+ }
+ final double lc = stirlingError(n) - stirlingError(x) - stirlingError(n - x) - devianceTerm(x, n * p) - devianceTerm(n - x, n * q);
+ final double f = (MathUtil.TWOPI * x * (n - x)) / n;
+ return Math.exp(lc) / Math.sqrt(f);
+ }
+
+ // FIXME: implement!
+ @Override
+ public double cdf(double val) {
+ throw new AbortException(ExceptionMessages.UNSUPPORTED_NOT_YET);
+ }
+
+ // FIXME: implement!
+ @Override
+ public double quantile(double val) {
+ throw new AbortException(ExceptionMessages.UNSUPPORTED_NOT_YET);
+ }
+
+ /**
+ * Compute the poisson distribution PDF with an offset of + 1
+ *
+ * pdf(x_plus_1 - 1, lambda)
+ *
+ * @param x_plus_1 x+1
+ * @param lambda Lambda
+ * @return pdf
+ */
+ public static double poissonPDFm1(double x_plus_1, double lambda) {
+ if(Double.isInfinite(lambda)) {
+ return 0.;
+ }
+ if(x_plus_1 > 1) {
+ return rawProbability(x_plus_1 - 1, lambda);
+ }
+ if(lambda > Math.abs(x_plus_1 - 1) * MathUtil.LOG2 * Double.MAX_EXPONENT / 1e-14) {
+ return Math.exp(-lambda - GammaDistribution.logGamma(x_plus_1));
+ }
+ else {
+ return rawProbability(x_plus_1, lambda) * (x_plus_1 / lambda);
+ }
+ }
+
+ /**
+ * Compute the poisson distribution PDF with an offset of + 1
+ *
+ * log pdf(x_plus_1 - 1, lambda)
+ *
+ * @param x_plus_1 x+1
+ * @param lambda Lambda
+ * @return pdf
+ */
+ public static double logpoissonPDFm1(double x_plus_1, double lambda) {
+ if(Double.isInfinite(lambda)) {
+ return Double.NEGATIVE_INFINITY;
+ }
+ if(x_plus_1 > 1) {
+ return rawLogProbability(x_plus_1 - 1, lambda);
+ }
+ if(lambda > Math.abs(x_plus_1 - 1) * MathUtil.LOG2 * Double.MAX_EXPONENT / 1e-14) {
+ return -lambda - GammaDistribution.logGamma(x_plus_1);
+ }
+ else {
+ return rawLogProbability(x_plus_1, lambda) + Math.log(x_plus_1 / lambda);
+ }
+ }
+
+ /**
+ * Calculates the Striling Error
+ *
+ * stirlerr(n) = ln(n!) - ln(sqrt(2*pi*n)*(n/e)^n)
+ *
+ * @param n Parameter n
+ * @return Stirling error
+ */
+ @Reference(title = "Fast and accurate computation of binomial probabilities", authors = "C. Loader", booktitle = "", url = "http://projects.scipy.org/scipy/raw-attachment/ticket/620/loader2000Fast.pdf")
+ private static double stirlingError(int n) {
+ // Try to use a table value:
+ if(n < 16) {
+ return STIRLING_EXACT_ERROR[n * 2];
+ }
+ final double nn = n * n;
+ // Use the appropriate number of terms
+ if(n > 500) {
+ return (S0 - S1 / nn) / n;
+ }
+ if(n > 80) {
+ return ((S0 - (S1 - S2 / nn)) / nn) / n;
+ }
+ if(n > 35) {
+ return ((S0 - (S1 - (S2 - S3 / nn) / nn) / nn) / n);
+ }
+ return ((S0 - (S1 - (S2 - (S3 - S4 / nn) / nn) / nn) / nn) / n);
+ }
+
+ /**
+ * Calculates the Striling Error
+ *
+ * stirlerr(n) = ln(n!) - ln(sqrt(2*pi*n)*(n/e)^n)
+ *
+ * @param n Parameter n
+ * @return Stirling error
+ */
+ @Reference(title = "Fast and accurate computation of binomial probabilities", authors = "C. Loader", booktitle = "", url = "http://projects.scipy.org/scipy/raw-attachment/ticket/620/loader2000Fast.pdf")
+ private static double stirlingError(double n) {
+ if(n < 16.0) {
+ // Our table has a step size of 0.5
+ final double n2 = 2.0 * n;
+ if(Math.floor(n2) == n2) { // Exact match
+ return STIRLING_EXACT_ERROR[(int) n2];
+ }
+ else {
+ return GammaDistribution.logGamma(n + 1.0) - (n + 0.5) * Math.log(n) + n - MathUtil.LOGSQRTTWOPI;
+ }
+ }
+ final double nn = n * n;
+ if(n > 500.0) {
+ return (S0 - S1 / nn) / n;
+ }
+ if(n > 80.0) {
+ return ((S0 - (S1 - S2 / nn)) / nn) / n;
+ }
+ if(n > 35.0) {
+ return ((S0 - (S1 - (S2 - S3 / nn) / nn) / nn) / n);
+ }
+ return ((S0 - (S1 - (S2 - (S3 - S4 / nn) / nn) / nn) / nn) / n);
+ }
+
+ /**
+ * Evaluate the deviance term of the saddle point approximation.
+ *
+ * bd0(x,np) = x*ln(x/np)+np-x
+ *
+ * @param x probability density function position
+ * @param np product of trials and success probability: n*p
+ * @return Deviance term
+ */
+ @Reference(title = "Fast and accurate computation of binomial probabilities", authors = "C. Loader", booktitle = "", url = "http://projects.scipy.org/scipy/raw-attachment/ticket/620/loader2000Fast.pdf")
+ private static double devianceTerm(double x, double np) {
+ if(Math.abs(x - np) < 0.1 * (x + np)) {
+ final double v = (x - np) / (x + np);
+
+ double s = (x - np) * v;
+ double ej = 2.0d * x * v;
+ for(int j = 1;; j++) {
+ ej *= v * v;
+ final double s1 = s + ej / (2 * j + 1);
+ if(s1 == s) {
+ return s1;
+ }
+ s = s1;
+ }
+ }
+ return x * Math.log(x / np) + np - x;
+ }
+
+ /**
+ * Poisson distribution probability, but also for non-integer arguments.
+ *
+ * lb^x exp(-lb) / x!
+ *
+ * @param x X
+ * @param lambda lambda
+ * @return Poisson distribution probability
+ */
+ public static double rawProbability(double x, double lambda) {
+ // Extreme lambda
+ if(lambda == 0) {
+ return ((x == 0) ? 1. : 0.);
+ }
+ // Extreme values
+ if(Double.isInfinite(lambda) || x < 0) {
+ return 0.;
+ }
+ if(x <= lambda * Double.MIN_NORMAL) {
+ return Math.exp(-lambda);
+ }
+ if(lambda < x * Double.MIN_NORMAL) {
+ double r = -lambda + x * Math.log(lambda) - GammaDistribution.logGamma(x + 1);
+ return Math.exp(r);
+ }
+ final double f = MathUtil.TWOPI * x;
+ final double y = -stirlingError(x) - devianceTerm(x, lambda);
+ return Math.exp(y) / Math.sqrt(f);
+ }
+
+ /**
+ * Poisson distribution probability, but also for non-integer arguments.
+ *
+ * lb^x exp(-lb) / x!
+ *
+ * @param x X
+ * @param lambda lambda
+ * @return Poisson distribution probability
+ */
+ public static double rawLogProbability(double x, double lambda) {
+ // Extreme lambda
+ if(lambda == 0) {
+ return ((x == 0) ? 1. : Double.NEGATIVE_INFINITY);
+ }
+ // Extreme values
+ if(Double.isInfinite(lambda) || x < 0) {
+ return Double.NEGATIVE_INFINITY;
+ }
+ if(x <= lambda * Double.MIN_NORMAL) {
+ return -lambda;
+ }
+ if(lambda < x * Double.MIN_NORMAL) {
+ return -lambda + x * Math.log(lambda) - GammaDistribution.logGamma(x + 1);
+ }
+ final double f = MathUtil.TWOPI * x;
+ final double y = -stirlingError(x) - devianceTerm(x, lambda);
+ return -0.5 * Math.log(f) + y;
+ }
+} \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/StudentsTDistribution.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/StudentsTDistribution.java
new file mode 100644
index 00000000..2e9e0d15
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/StudentsTDistribution.java
@@ -0,0 +1,90 @@
+package de.lmu.ifi.dbs.elki.math.statistics.distribution;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2012
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException;
+import de.lmu.ifi.dbs.elki.utilities.exceptions.ExceptionMessages;
+
+/**
+ * Student's t distribution.
+ *
+ * FIXME: add quantile function!
+ *
+ * @author Jan Brusis
+ */
+public class StudentsTDistribution implements Distribution {
+ /**
+ * Degrees of freedom
+ */
+ private final int v;
+
+ /**
+ * Constructor.
+ *
+ * @param v Degrees of freedom
+ */
+ public StudentsTDistribution(int v) {
+ this.v = v;
+ }
+
+ @Override
+ public double pdf(double val) {
+ return pdf(val, v);
+ }
+
+ @Override
+ public double cdf(double val) {
+ return cdf(val, v);
+ }
+
+ // FIXME: implement!
+ @Override
+ public double quantile(double val) {
+ throw new AbortException(ExceptionMessages.UNSUPPORTED_NOT_YET);
+ }
+
+ /**
+ * Static version of the t distribution's PDF.
+ *
+ * @param val value to evaluate
+ * @param v degrees of freedom
+ * @return f(val,v)
+ */
+ public static double pdf(double val, int v) {
+ // TODO: improve precision by computing "exp" last?
+ return Math.exp(GammaDistribution.logGamma((v + 1) / 2) - GammaDistribution.logGamma(v / 2)) * (1 / Math.sqrt(v * Math.PI)) * Math.pow(1 + (val * val) / v, -((v + 1) / 2));
+ }
+
+ /**
+ * Static version of the CDF of the t-distribution for t > 0
+ *
+ * @param val value to evaluate
+ * @param v degrees of freedom
+ * @return F(val, v)
+ */
+ public static double cdf(double val, int v) {
+ double x = v / (val * val + v);
+ return 1 - (0.5 * BetaDistribution.regularizedIncBeta(x, v / 2, 0.5));
+ }
+} \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/UniformDistribution.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/UniformDistribution.java
index 9571cfd3..4f54fbf9 100644
--- a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/UniformDistribution.java
+++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/UniformDistribution.java
@@ -30,7 +30,7 @@ import java.util.Random;
*
* @author Erich Schubert
*/
-public class UniformDistribution implements Distribution {
+public class UniformDistribution implements DistributionWithRandom {
/**
* Minimum
*/
@@ -100,20 +100,20 @@ public class UniformDistribution implements Distribution {
}
return (val - min) / len;
}
+
+ @Override
+ public double quantile(double val) {
+ return min + len * val;
+ }
@Override
public double nextRandom() {
return min + random.nextDouble() * len;
}
- /**
- * Simple toString explaining the distribution parameters.
- *
- * Used in describing cluster models.
- */
@Override
public String toString() {
- return "Uniform Distribution (min=" + min + ", max=" + max + ")";
+ return "UniformDistribution(min=" + min + ", max=" + max + ")";
}
/**