15 files changed, 1918 insertions, 38 deletions
diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/BetaDistribution.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/BetaDistribution.java
new file mode 100644
index 00000000..1efe19dd
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/BetaDistribution.java
@@ -0,0 +1,499 @@
+package de.lmu.ifi.dbs.elki.math.statistics.distribution;
+
+import java.util.Random;
+
+import de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2012
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/**
+ * Beta Distribution with implementation of the regularized incomplete beta
+ * function
+ * 
+ * @author Jan Brusis
+ * @author Erich Schubert
+ */
+public class BetaDistribution implements DistributionWithRandom {
+  /**
+   * Numerical precision to use
+   */
+  static final double NUM_PRECISION = 1E-15;
+
+  /**
+   * Limit of when to switch to quadrature method
+   */
+  static final double SWITCH = 3000;
+
+  /**
+   * Abscissas for Gauss-Legendre quadrature
+   */
+  static final double[] GAUSSLEGENDRE_Y = { 0.0021695375159141994, 0.011413521097787704, 0.027972308950302116, 0.051727015600492421, 0.082502225484340941, 0.12007019910960293, 0.16415283300752470, 0.21442376986779355, 0.27051082840644336, 0.33199876341447887, 0.39843234186401943, 0.46931971407375483, 0.54413605556657973, 0.62232745288031077, 0.70331500465597174, 0.78649910768313447, 0.87126389619061517, 0.95698180152629142 };
+
+  /**
+   * Weights for Gauss-Legendre quadrature
+   */
+  static final double[] GAUSSLEGENDRE_W = { 0.0055657196642445571, 0.012915947284065419, 0.020181515297735382, 0.027298621498568734, 0.034213810770299537, 0.040875750923643261, 0.047235083490265582, 0.053244713977759692, 0.058860144245324798, 0.064039797355015485, 0.068745323835736408, 0.072941885005653087, 0.076598410645870640, 0.079687828912071670, 0.082187266704339706, 0.084078218979661945, 0.085346685739338721, 0.085983275670394821 };
+
+  /**
+   * Shape parameter of beta distribution
+   */
+  private final double alpha;
+
+  /**
+   * Shape parameter of beta distribution
+   */
+  private final double beta;
+
+  /**
+   * For random number generation
+   */
+  private Random random;
+
+  /**
+   * Log beta(a, b) cache
+   */
+  private double logbab;
+
+  /**
+   * Constructor.
+   * 
+   * @param a shape Parameter a
+   * @param b shape Parameter b
+   */
+  public BetaDistribution(double a, double b) {
+    this(a, b, new Random());
+  }
+
+  /**
+   * Constructor.
+   * 
+   * @param a shape Parameter a
+   * @param b shape Parameter b
+   * @param random Random generator
+   */
+  public BetaDistribution(double a, double b, Random random) {
+    super();
+    if(a <= 0.0 || b <= 0.0) {
+      throw new IllegalArgumentException("Invalid parameters for Beta distribution.");
+    }
+
+    this.alpha = a;
+    this.beta = b;
+    this.logbab = logBeta(a, b);
+    this.random = random;
+  }
+
+  @Override
+  public double pdf(double val) {
+    if(val < 0. || val > 1.) {
+      return 0.;
+    }
+    if(val == 0.) {
+      if(alpha > 1.) {
+        return 0.;
+      }
+      if(alpha < 1.) {
+        return Double.POSITIVE_INFINITY;
+      }
+      return beta;
+    }
+    if(val == 1.) {
+      if(beta > 1.) {
+        return 0.;
+      }
+      if(beta < 1.) {
+        return Double.POSITIVE_INFINITY;
+      }
+      return alpha;
+    }
+    return Math.exp(-logbab + Math.log(val) * (alpha - 1) + Math.log1p(-val) * (beta - 1));
+  }
+
+  @Override
+  public double cdf(double x) {
+    if(alpha <= 0.0 || beta <= 0.0 || Double.isNaN(alpha) || Double.isNaN(beta) || Double.isNaN(x)) {
+      return Double.NaN;
+    }
+    if(x <= 0.0) {
+      return 0.0;
+    }
+    if(x >= 1.0) {
+      return 1.0;
+    }
+    if(alpha > SWITCH && beta > SWITCH) {
+      return regularizedIncBetaQuadrature(alpha, beta, x);
+    }
+    double bt = Math.exp(-logbab + alpha * Math.log(x) + beta * Math.log1p(-x));
+    if(x < (alpha + 1.0) / (alpha + beta + 2.0)) {
+      return bt * regularizedIncBetaCF(alpha, beta, x) / alpha;
+    }
+    else {
+      return 1.0 - bt * regularizedIncBetaCF(beta, alpha, 1.0 - x) / beta;
+    }
+  }
+
+  @Override
+  public double quantile(double x) {
+    // Valid parameters
+    if(x < 0 || x > 1 || Double.isNaN(x)) {
+      return Double.NaN;
+    }
+    if(x == 0) {
+      return 0.0;
+    }
+    if(x == 1) {
+      return 1.0;
+    }
+    // Simpler to compute inverse?
+    if(x > 0.5) {
+      return 1 - rawQuantile(1 - x, beta, alpha, logbab);
+    }
+    else {
+      return rawQuantile(x, alpha, beta, logbab);
+    }
+  }
+
+  @Override
+  public double nextRandom() {
+    double x = GammaDistribution.nextRandom(alpha, 1, random);
+    double y = GammaDistribution.nextRandom(beta, 1, random);
+    return x / (x + y);
+  }
+
+  @Override
+  public String toString() {
+    return "BetaDistribution(alpha=" + alpha + ", beta=" + beta + ")";
+  }
+
+  /**
+   * Static version of the CDF of the beta distribution
+   * 
+   * @param val Value
+   * @param alpha Shape parameter a
+   * @param beta Shape parameter b
+   * @return cumulative density
+   */
+  public static double cdf(double val, double alpha, double beta) {
+    return regularizedIncBeta(val, alpha, beta);
+  }
+
+  /**
+   * Static version of the PDF of the beta distribution
+   * 
+   * @param val Value
+   * @param alpha Shape parameter a
+   * @param beta Shape parameter b
+   * @return probability density
+   */
+  public static double pdf(double val, double alpha, double beta) {
+    if(alpha <= 0. || beta <= 0. || Double.isNaN(alpha) || Double.isNaN(beta) || Double.isNaN(val)) {
+      return Double.NaN;
+    }
+    if(val < 0. || val > 1.) {
+      return 0.;
+    }
+    if(val == 0.) {
+      if(alpha > 1.) {
+        return 0.;
+      }
+      if(alpha < 1.) {
+        return Double.POSITIVE_INFINITY;
+      }
+      return beta;
+    }
+    if(val == 1.) {
+      if(beta > 1.) {
+        return 0.;
+      }
+      if(beta < 1.) {
+        return Double.POSITIVE_INFINITY;
+      }
+      return alpha;
+    }
+    return Math.exp(-logBeta(alpha, beta) + Math.log(val) * (alpha - 1) + Math.log1p(-val) * (beta - 1));
+  }
+
+  /**
+   * Compute log beta(a,b)
+   * 
+   * @param alpha Shape parameter a
+   * @param beta Shape parameter b
+   * @return Logarithm of result
+   */
+  public static double logBeta(double alpha, double beta) {
+    return GammaDistribution.logGamma(alpha) + GammaDistribution.logGamma(beta) - GammaDistribution.logGamma(alpha + beta);
+  }
+
+  /**
+   * Computes the regularized incomplete beta function I_x(a, b) which is also
+   * the CDF of the beta distribution. Based on the book "Numerical Recipes"
+   * 
+   * @param alpha Parameter a
+   * @param beta Parameter b
+   * @param x Parameter x
+   * @return Value of the regularized incomplete beta function
+   */
+  public static double regularizedIncBeta(double x, double alpha, double beta) {
+    if(alpha <= 0.0 || beta <= 0.0 || Double.isNaN(alpha) || Double.isNaN(beta) || Double.isNaN(x)) {
+      return Double.NaN;
+    }
+    if(x <= 0.0) {
+      return 0.0;
+    }
+    if(x >= 1.0) {
+      return 1.0;
+    }
+    if(alpha > SWITCH && beta > SWITCH) {
+      return regularizedIncBetaQuadrature(alpha, beta, x);
+    }
+    double bt = Math.exp(-logBeta(alpha, beta) + alpha * Math.log(x) + beta * Math.log1p(-x));
+    if(x < (alpha + 1.0) / (alpha + beta + 2.0)) {
+      return bt * regularizedIncBetaCF(alpha, beta, x) / alpha;
+    }
+    else {
+      return 1.0 - bt * regularizedIncBetaCF(beta, alpha, 1.0 - x) / beta;
+    }
+  }
+
+  /**
+   * Returns the regularized incomplete beta function I_x(a, b) Includes the
+   * continued fraction way of computing, based on the book "Numerical Recipes".
+   * 
+   * @param alpha Parameter a
+   * @param beta Parameter b
+   * @param x Parameter x
+   * @return result
+   */
+  protected static double regularizedIncBetaCF(double alpha, double beta, double x) {
+    final double FPMIN = Double.MIN_VALUE / NUM_PRECISION;
+    double qab = alpha + beta;
+    double qap = alpha + 1.0;
+    double qam = alpha - 1.0;
+    double c = 1.0;
+    double d = 1.0 - qab * x / qap;
+    if(Math.abs(d) < FPMIN) {
+      d = FPMIN;
+    }
+    d = 1.0 / d;
+    double h = d;
+    for(int m = 1; m < 10000; m++) {
+      int m2 = 2 * m;
+      double aa = m * (beta - m) * x / ((qam + m2) * (alpha + m2));
+      d = 1.0 + aa * d;
+      if(Math.abs(d) < FPMIN) {
+        d = FPMIN;
+      }
+      c = 1.0 + aa / c;
+      if(Math.abs(c) < FPMIN) {
+        c = FPMIN;
+      }
+      d = 1.0 / d;
+      h *= d * c;
+      aa = -(alpha + m) * (qab + m) * x / ((alpha + m2) * (qap + m2));
+      d = 1.0 + aa * d;
+      if(Math.abs(d) < FPMIN) {
+        d = FPMIN;
+      }
+      c = 1.0 + aa / c;
+      if(Math.abs(c) < FPMIN) {
+        c = FPMIN;
+      }
+      d = 1.0 / d;
+      double del = d * c;
+      h *= del;
+      if(Math.abs(del - 1.0) <= NUM_PRECISION) {
+        break;
+      }
+    }
+    return h;
+  }
+
+  /**
+   * Returns the regularized incomplete beta function I_x(a, b) by quadrature,
+   * based on the book "Numerical Recipes".
+   * 
+   * @param alpha Parameter a
+   * @param beta Parameter b
+   * @param x Parameter x
+   * @return result
+   */
+  protected static double regularizedIncBetaQuadrature(double alpha, double beta, double x) {
+    double a1 = alpha - 1.0;
+    double b1 = beta - 1.0;
+    double mu = alpha / (alpha + beta);
+    double lnmu = Math.log(mu);
+    double lnmuc = Math.log1p(-mu);
+    double t = Math.sqrt(alpha * beta / ((alpha + beta) * (alpha + beta) * (alpha + beta + 1.0)));
+    double xu;
+    if(x > alpha / (alpha + beta)) {
+      if(x >= 1.0) {
+        return 1.0;
+      }
+      xu = Math.min(1.0, Math.max(mu + 10.0 * t, x + 5.0 * t));
+    }
+    else {
+      if(x <= 0.0) {
+        return 0.0;
+      }
+      xu = Math.max(0.0, Math.min(mu - 10.0 * t, x - 5.0 * t));
+    }
+    double sum = 0.0;
+    for(int i = 0; i < GAUSSLEGENDRE_Y.length; i++) {
+      t = x + (xu - x) * GAUSSLEGENDRE_Y[i];
+      sum += GAUSSLEGENDRE_W[i] * Math.exp(a1 * (Math.log(t) - lnmu) + b1 * (Math.log1p(-t) - lnmuc));
+    }
+    double ans = sum * (xu - x) * Math.exp(a1 * lnmu - GammaDistribution.logGamma(alpha) + b1 * lnmuc - GammaDistribution.logGamma(b1) + GammaDistribution.logGamma(alpha + beta));
+    return ans > 0 ? 1.0 - ans : -ans;
+  }
+
+  /**
+   * Compute quantile (inverse cdf) for Beta distributions.
+   * 
+   * @param p Probability
+   * @param alpha Shape parameter a
+   * @param beta Shape parameter b
+   * @return Probit for Beta distribution
+   */
+  public static double quantile(double p, double alpha, double beta) {
+    // Valid parameters
+    if(Double.isNaN(alpha) || Double.isNaN(beta) || Double.isNaN(p) || alpha < 0. || beta < 0.) {
+      return Double.NaN;
+    }
+    if(p < 0 || p > 1) {
+      return Double.NaN;
+    }
+    if(p == 0) {
+      return 0.0;
+    }
+    if(p == 1) {
+      return 1.0;
+    }
+    // Simpler to compute inverse?
+    if(p > 0.5) {
+      return 1 - rawQuantile(1 - p, beta, alpha, logBeta(beta, alpha));
+    }
+    else {
+      return rawQuantile(p, alpha, beta, logBeta(alpha, beta));
+    }
+  }
+
+  /**
+   * Raw quantile function
+   * 
+   * @param p P, must be 0 < p <= .5
+   * @param alpha Alpha
+   * @param beta Beta
+   * @param logbeta log Beta(alpha, beta)
+   * @return Position
+   */
+  protected static double rawQuantile(double p, double alpha, double beta, final double logbeta) {
+    // Initial estimate for x
+    double x;
+    {
+      // Very fast approximation of y.
+      double tmp = Math.sqrt(-2 * Math.log(p));
+      double y = tmp - (2.30753 + 0.27061 * tmp) / (1. + (0.99229 + 0.04481 * tmp) * tmp);
+
+      if(alpha > 1 && beta > 1) {
+        double r = (y * y - 3.) / 6.;
+        double s = 1. / (alpha + alpha - 1.);
+        double t = 1. / (beta + beta - 1.);
+        double h = 2. / (s + t);
+        double w = y * Math.sqrt(h + r) / h - (t - s) * (r + 5. / 6. - 2. / (3. * h));
+        x = alpha / (alpha + beta * Math.exp(w + w));
+      }
+      else {
+        double r = beta + beta;
+        double t = 1. / (9. * beta);
+        t = r * Math.pow(1. - t + y * Math.sqrt(t), 3.0);
+        if(t <= 0.) {
+          x = 1. - Math.exp((Math.log1p(-p) + Math.log(beta) + logbeta) / beta);
+        }
+        else {
+          t = (4. * alpha + r - 2.) / t;
+          if(t <= 1.) {
+            x = Math.exp((Math.log(p * alpha) + logbeta) / alpha);
+          }
+          else {
+            x = 1. - 2. / (t + 1.);
+          }
+        }
+      }
+      // Degenerate initial approximations
+      if(x < 3e-308 || x > 1 - 2.22e-16) {
+        x = 0.5;
+      }
+    }
+
+    // Newon-Raphson method using the CDF
+    {
+      final double ialpha = 1 - alpha;
+      final double ibeta = 1 - beta;
+
+      // Desired accuracy, as from GNU R adoption of AS 109
+      final double acu = Math.max(1e-300, Math.pow(10., -13 - 2.5 / (alpha * alpha) - .5 / (p * p)));
+      double prevstep = 0., y = 0., stepsize = 1;
+
+      for(int outer = 0; outer < 1000; outer++) {
+        // Current CDF value
+        double ynew = cdf(x, alpha, beta);
+        if(Double.isInfinite(ynew)) { // Degenerated.
+          return Double.NaN;
+        }
+        // Error gradient
+        ynew = (ynew - p) * Math.exp(logbeta + ialpha * Math.log(x) + ibeta * Math.log1p(-x));
+        if(ynew * y <= 0.) {
+          prevstep = Math.max(Math.abs(stepsize), 3e-308);
+        }
+        // Inner loop: try different step sizes: y * 3^-i
+        double g = 1, xnew = 0.;
+        for(int inner = 0; inner < 1000; inner++) {
+          stepsize = g * ynew;
+          if(Math.abs(stepsize) < prevstep) {
+            xnew = x - stepsize; // Candidate x
+            if(xnew >= 0. && xnew <= 1.) {
+              // Close enough
+              if(prevstep <= acu || Math.abs(ynew) <= acu) {
+                return x;
+              }
+              if(xnew != 0. && xnew != 1.) {
+                break;
+              }
+            }
+          }
+          g /= 3.;
+        }
+        // Convergence
+        if(Math.abs(xnew - x) < 1e-15 * x) {
+          return x;
+        }
+        // Iterate with new values
+        x = xnew;
+        y = ynew;
+      }
+    }
+    // Not converged in Newton-Raphson
+    throw new AbortException("Beta quantile computation did not converge.");
+  }
+}
+\ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/ChiDistribution.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/ChiDistribution.java
index c561c4cd..84c86e98 100644
--- a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/ChiDistribution.java
+++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/ChiDistribution.java
@@ -1,5 +1,7 @@
 package de.lmu.ifi.dbs.elki.math.statistics.distribution;
 
+import de.lmu.ifi.dbs.elki.utilities.exceptions.ExceptionMessages;
+
 /*
  This file is part of ELKI:
  Environment for Developing KDD-Applications Supported by Index-Structures
@@ -27,8 +29,10 @@ package de.lmu.ifi.dbs.elki.math.statistics.distribution;
  * Chi distribution.
  * 
  * @author Erich Schubert
+ * 
+ * @apiviz.composedOf ChiSquaredDistribution
  */
-public class ChiDistribution implements Distribution {
+public class ChiDistribution implements DistributionWithRandom {
   /**
    * Degrees of freedom. Usually integer.
    */
@@ -89,4 +93,15 @@ public class ChiDistribution implements Distribution {
   public static double cdf(double val, double dof) {
     return GammaDistribution.regularizedGammaP(dof / 2, val * val / 2);
   }
+
+  // FIXME: implement!
+  @Override
+  public double quantile(double val) {
+    throw new UnsupportedOperationException(ExceptionMessages.UNSUPPORTED_NOT_YET);
+  }
+
+  @Override
+  public String toString() {
+    return "ChiDistribution(dof=" + dof + ")";
+  }
 }
 \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/ChiSquaredDistribution.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/ChiSquaredDistribution.java
index 0ab39c78..8555afd3 100644
--- a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/ChiSquaredDistribution.java
+++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/ChiSquaredDistribution.java
@@ -1,5 +1,7 @@
 package de.lmu.ifi.dbs.elki.math.statistics.distribution;
 
+import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
+
 /*
  This file is part of ELKI:
  Environment for Developing KDD-Applications Supported by Index-Structures
@@ -35,7 +37,7 @@ public class ChiSquaredDistribution extends GammaDistribution {
    * @param dof Degrees of freedom.
    */
   public ChiSquaredDistribution(double dof) {
-    super(.5 * dof, 2.0);
+    super(.5 * dof, .5);
   }
 
   /**
@@ -69,4 +71,23 @@ public class ChiSquaredDistribution extends GammaDistribution {
     }
     return Math.exp((k - 1.0) * Math.log(x * 2.0) - x * 2.0 - logGamma(k)) * 2.0;
   }
+
+  /**
+   * Return the quantile function for this distribution
+   * 
+   * Reference:
+   * <p>
+   * Algorithm AS 91: The percentage points of the $\chi$^2 distribution<br />
+   * D.J. Best, D. E. Roberts<br />
+   * Journal of the Royal Statistical Society. Series C (Applied Statistics)
+   * </p>
+   * 
+   * @param x Quantile
+   * @param dof Degrees of freedom
+   * @return quantile position
+   */
+  @Reference(title = "Algorithm AS 91: The percentage points of the $\\chi^2$ distribution", authors = "D.J. Best, D. E. Roberts", booktitle = "Journal of the Royal Statistical Society. Series C (Applied Statistics)")
+  public static double quantile(double x, double dof) {
+    return GammaDistribution.quantile(x, .5 * dof, .5);
+  }
 }
 \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/ConstantDistribution.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/ConstantDistribution.java
index 1f36dd4a..b046f0ef 100644
--- a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/ConstantDistribution.java
+++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/ConstantDistribution.java
@@ -28,7 +28,7 @@ package de.lmu.ifi.dbs.elki.math.statistics.distribution;
  * 
  * @author Erich Schubert
  */
-public class ConstantDistribution implements Distribution {
+public class ConstantDistribution implements DistributionWithRandom {
   /**
    * The constant
    */
@@ -58,4 +58,9 @@ public class ConstantDistribution implements Distribution {
   public double cdf(double val) {
     return (val >= c) ? 1.0 : 0.0;
   }
+
+  @Override
+  public double quantile(double val) {
+    return c;
+  }
 }
 \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/Distribution.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/Distribution.java
index 290e6434..5b6cd286 100644
--- a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/Distribution.java
+++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/Distribution.java
@@ -24,20 +24,14 @@ package de.lmu.ifi.dbs.elki.math.statistics.distribution;
  */
 
 /**
- * Interface for a simple distribution generator with a PDF, i.e. it can also
- * compute a density
+ * Statistical distributions, with their common functions. See
+ * {@link DistributionWithRandom} for distributions that also have a random
+ * generator included.
  * 
  * @author Erich Schubert
  */
 public interface Distribution {
   /**
-   * Generate a new random value
-   * 
-   * @return new random value
-   */
-  public double nextRandom();
-
-  /**
    * Return the density of an existing value
    * 
    * @param val existing value
@@ -54,6 +48,14 @@ public interface Distribution {
   public double cdf(double val);
 
   /**
+   * Quantile aka probit (for normal) aka inverse CDF (invcdf, cdf^-1) function.
+   * 
+   * @param val Quantile to find
+   * @return Quantile position
+   */
+  public double quantile(double val);
+
+  /**
    * Describe the distribution
    * 
    * @return description
diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/DistributionWithRandom.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/DistributionWithRandom.java
new file mode 100644
index 00000000..af272528
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/DistributionWithRandom.java
@@ -0,0 +1,37 @@
+package de.lmu.ifi.dbs.elki.math.statistics.distribution;
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2012
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/**
+ * Distribution that also has support for generating random numbers.
+ * 
+ * @author Erich Schubert
+ */
+public interface DistributionWithRandom extends Distribution {
+  /**
+   * Generate a new random value
+   * 
+   * @return new random value
+   */
+  public double nextRandom();
+}
diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/GammaDistribution.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/GammaDistribution.java
index 6830f25a..21eebc51 100644
--- a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/GammaDistribution.java
+++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/GammaDistribution.java
@@ -25,14 +25,21 @@ package de.lmu.ifi.dbs.elki.math.statistics.distribution;
 
 import java.util.Random;
 
+import de.lmu.ifi.dbs.elki.logging.LoggingUtil;
 import de.lmu.ifi.dbs.elki.math.MathUtil;
+import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
 
 /**
  * Gamma Distribution, with random generation and density functions.
  * 
  * @author Erich Schubert
  */
-public class GammaDistribution implements Distribution {
+public class GammaDistribution implements DistributionWithRandom {
+  /**
+   * Euler–Mascheroni constant
+   */
+  public static final double EULERS_CONST = 0.5772156649015328606065120900824024;
+
   /**
    * LANCZOS-Coefficients for Gamma approximation.
    * 
@@ -102,6 +109,11 @@ public class GammaDistribution implements Distribution {
   }
 
   @Override
+  public double quantile(double val) {
+    return quantile(val, k, theta);
+  }
+
+  @Override
   public double nextRandom() {
     return nextRandom(k, theta, random);
   }
@@ -113,7 +125,7 @@ public class GammaDistribution implements Distribution {
    */
   @Override
   public String toString() {
-    return "Gamma Distribution (k=" + k + ", theta=" + theta + ")";
+    return "GammaDistribution(k=" + k + ", theta=" + theta + ")";
   }
 
   /**
@@ -139,7 +151,19 @@ public class GammaDistribution implements Distribution {
    * @return cdf value
    */
   public static double cdf(double val, double k, double theta) {
-    return regularizedGammaP(k, val / theta);
+    return regularizedGammaP(k, val * theta);
+  }
+
+  /**
+   * The log CDF, static version.
+   * 
+   * @param val Value
+   * @param k Shape k
+   * @param theta Theta = 1.0/Beta aka. "scaling" parameter
+   * @return cdf value
+   */
+  public static double logcdf(double val, double k, double theta) {
+    return logregularizedGammaP(k, val * theta);
   }
 
   /**
@@ -170,6 +194,33 @@ public class GammaDistribution implements Distribution {
   }
 
   /**
+   * Gamma distribution PDF (with 0.0 for x &lt; 0)
+   * 
+   * @param x query value
+   * @param k Alpha
+   * @param theta Theta = 1 / Beta
+   * @return probability density
+   */
+  public static double logpdf(double x, double k, double theta) {
+    if(x < 0) {
+      return Double.NEGATIVE_INFINITY;
+    }
+    if(x == 0) {
+      if(k == 1.0) {
+        return Math.log(theta);
+      }
+      else {
+        return Double.NEGATIVE_INFINITY;
+      }
+    }
+    if(k == 1.0) {
+      return Math.log(theta) - x * theta;
+    }
+
+    return Math.log(theta) + (k - 1.0) * Math.log(x * theta) - x * theta - logGamma(k);
+  }
+
+  /**
    * Compute logGamma.
    * 
    * Based loosely on "Numerical Recpies" and the work of Paul Godfrey at
@@ -195,6 +246,22 @@ public class GammaDistribution implements Distribution {
   }
 
   /**
+   * Compute the regular Gamma function.
+   * 
+   * Note: for numerical reasons, it is preferrable to use {@link #logGamma}
+   * when possible! In particular, this method just computes
+   * {@code Math.exp(logGamma(x))} anyway.
+   * 
+   * Try to postpone the {@code Math.exp} call to preserve numeric range!
+   * 
+   * @param x Position
+   * @return Gamma at this position
+   */
+  public static double gamma(double x) {
+    return Math.exp(logGamma(x));
+  }
+
+  /**
    * Returns the regularized gamma function P(a, x).
    * 
    * Includes the quadrature way of computing.
@@ -236,6 +303,49 @@ public class GammaDistribution implements Distribution {
   }
 
   /**
+   * Returns the regularized gamma function log P(a, x).
+   * 
+   * Includes the quadrature way of computing.
+   * 
+   * TODO: find "the" most accurate version of this. We seem to agree with
+   * others for the first 10+ digits, but diverge a bit later than that.
+   * 
+   * @param a Parameter a
+   * @param x Parameter x
+   * @return Gamma value
+   */
+  public static double logregularizedGammaP(final double a, final double x) {
+    // Special cases
+    if(Double.isNaN(a) || Double.isNaN(x) || (a <= 0.0) || (x < 0.0)) {
+      return Double.NaN;
+    }
+    if(x == 0.0) {
+      return Double.NEGATIVE_INFINITY;
+    }
+    if(x >= a + 1) {
+      // Expected to converge faster
+      // FIXME: and in log?
+      return Math.log(1.0 - regularizedGammaQ(a, x));
+    }
+    // Loosely following "Numerical Recipes"
+    double del = 1.0 / a;
+    double sum = del;
+    for(int n = 1; n < Integer.MAX_VALUE; n++) {
+      // compute next element in the series
+      del *= x / (a + n);
+      sum = sum + del;
+      if(Math.abs(del / sum) < NUM_PRECISION || sum >= Double.POSITIVE_INFINITY) {
+        break;
+      }
+    }
+    if(Double.isInfinite(sum)) {
+      return 0;
+    }
+    // TODO: reread numerical recipes, can we replace log(sum)?
+    return -x + (a * Math.log(x)) - logGamma(a) + Math.log(sum);
+  }
+
+  /**
    * Returns the regularized gamma function Q(a, x) = 1 - P(a, x).
    * 
    * Includes the continued fraction way of computing, based loosely on the book
@@ -313,7 +423,7 @@ public class GammaDistribution implements Distribution {
     final double e1 = 1.000000000, e2 = 0.499999994, e3 = 0.166666848;
     final double e4 = 0.041664508, e5 = 0.008345522, e6 = 0.001353826;
     final double e7 = 0.000247453;
-  
+
     if(k < 1.0) { // Base case, for small k
       final double b = 1.0 + 0.36788794412 * k; // Step 1
       while(true) {
@@ -360,11 +470,11 @@ public class GammaDistribution implements Distribution {
         /* v2 = tv2; */
         v12 = tv12;
       }
-  
+
       // double b = 0.0, c = 0.0;
       // double si = 0.0, q0 = 0.0;
       final double b, c, si, q0;
-  
+
       // Simpler accept cases & parameter computation
       {
         final double t = v1 * Math.sqrt(-2.0 * Math.log(v12) / v12);
@@ -373,14 +483,14 @@ public class GammaDistribution implements Distribution {
         if(t >= 0.0) {
           return (gds / theta); // Immediate acceptance
         }
-  
+
         // Random uniform
         final double un = random.nextDouble();
         // Squeeze acceptance
         if(d * un <= t * t * t) {
           return (gds / theta);
         }
-  
+
         if(k != -1.0) { // Step 4. Set-up for hat case
           final double r = 1.0 / k;
           q0 = ((((((((q9 * r + q8) * r + q7) * r + q6) * r + q5) * r + q4) * r + q3) * r + q2) * r + q1) * r;
@@ -425,7 +535,7 @@ public class GammaDistribution implements Distribution {
           }
         }
       }
-  
+
       // Double exponential deviate t
       while(true) {
         double e, u, sign_u, t;
@@ -438,7 +548,7 @@ public class GammaDistribution implements Distribution {
           t = b + (e * si) * sign_u;
         }
         while(t <= -0.71874483771719);
-  
+
         // New v(t) and q(t)
         final double v = t / (s + s);
         final double q;
@@ -467,4 +577,370 @@ public class GammaDistribution implements Distribution {
       }
     }
   }
+
+  /**
+   * Approximate probit for chi squared distribution
+   * 
+   * Based on first half of algorithm AS 91
+   * 
+   * Reference:
+   * <p>
+   * Algorithm AS 91: The percentage points of the $\chi$ 2 distribution<br />
+   * D.J. Best, D. E. Roberts<br />
+   * Journal of the Royal Statistical Society. Series C (Applied Statistics)
+   * </p>
+   * 
+   * @param p Probit value
+   * @param nu Shape parameter for Chi, nu = 2 * k
+   * @param g log(nu)
+   * @return Probit for chi squared
+   */
+  @Reference(title = "Algorithm AS 91: The percentage points of the $\\chi^2$ distribution", authors = "D.J. Best, D. E. Roberts", booktitle = "Journal of the Royal Statistical Society. Series C (Applied Statistics)")
+  protected static double chisquaredProbitApproximation(final double p, double nu, double g) {
+    final double EPS1 = 1e-2; // Approximation quality
+    // Sanity checks
+    if(Double.isNaN(p) || Double.isNaN(nu)) {
+      return Double.NaN;
+    }
+    // Range check
+    if(p <= 0) {
+      return 0;
+    }
+    if(p >= 1) {
+      return Double.POSITIVE_INFINITY;
+    }
+    // Invalid parameters
+    if(nu <= 0) {
+      return Double.NaN;
+    }
+    // Shape of gamma distribution, "XX" in AS 91
+    final double k = 0.5 * nu;
+
+    // For small chi squared values - AS 91
+    final double logp = Math.log(p);
+    if(nu < -1.24 * logp) {
+      // FIXME: implement and use logGammap1 instead - more stable?
+      //
+      // final double lgam1pa = (alpha < 0.5) ? logGammap1(alpha) :
+      // (Math.log(alpha) + g);
+      // return Math.exp((lgam1pa + logp) / alpha + MathUtil.LOG2);
+      // This is literal AS 91, above is the GNU R variant.
+      return Math.pow(p * k * Math.exp(g + k * MathUtil.LOG2), 1 / k);
+    }
+    else if(nu > 0.32) {
+      // Wilson and Hilferty estimate: - AS 91 at 3
+      final double x = NormalDistribution.quantile(p, 0, 1);
+      final double p1 = 2. / (9. * nu);
+      double ch = nu * Math.pow(x * Math.sqrt(p1) + 1 - p1, 3);
+
+      // Better approximation for p tending to 1:
+      if(ch > 2.2 * nu + 6) {
+        ch = -2 * (Math.log1p(-p) - (k - 1) * Math.log(0.5 * ch) + g);
+      }
+      return ch;
+    }
+    else {
+      // nu <= 0.32, AS 91 at 1
+      final double C7 = 4.67, C8 = 6.66, C9 = 6.73, C10 = 13.32;
+      final double ag = Math.log1p(-p) + g + (k - 1) * MathUtil.LOG2;
+      double ch = 0.4;
+      while(true) {
+        final double p1 = 1 + ch * (C7 + ch);
+        final double p2 = ch * (C9 + ch * (C8 + ch));
+        final double t = -0.5 + (C7 + 2 * ch) / p1 - (C9 + ch * (C10 + 3 * ch)) / p2;
+        final double delta = (1 - Math.exp(ag + 0.5 * ch) * p2 / p1) / t;
+        ch -= delta;
+        if(Math.abs(delta) > EPS1 * Math.abs(ch)) {
+          return ch;
+        }
+      }
+    }
+  }
+
+  /**
+   * Compute probit (inverse cdf) for Gamma distributions.
+   * 
+   * Based on algorithm AS 91:
+   * 
+   * Reference:
+   * <p>
+   * Algorithm AS 91: The percentage points of the $\chi$^2 distribution<br />
+   * D.J. Best, D. E. Roberts<br />
+   * Journal of the Royal Statistical Society. Series C (Applied Statistics)
+   * </p>
+   * 
+   * @param p Probability
+   * @param k k, alpha aka. "shape" parameter
+   * @param theta Theta = 1.0/Beta aka. "scaling" parameter
+   * @return Probit for Gamma distribution
+   */
+  @Reference(title = "Algorithm AS 91: The percentage points of the $\\chi$^2 distribution", authors = "D.J. Best, D. E. Roberts", booktitle = "Journal of the Royal Statistical Society. Series C (Applied Statistics)")
+  public static double quantile(double p, double k, double theta) {
+    final double EPS2 = 5e-7; // final precision of AS 91
+    final int MAXIT = 1000;
+
+    // Avoid degenerates
+    if(Double.isNaN(p) || Double.isNaN(k) || Double.isNaN(theta)) {
+      return Double.NaN;
+    }
+    // Range check
+    if(p <= 0) {
+      return 0;
+    }
+    if(p >= 1) {
+      return Double.POSITIVE_INFINITY;
+    }
+    // Shape parameter check
+    if(k < 0 || theta <= 0) {
+      return Double.NaN;
+    }
+    // Corner case - all at 0
+    if(k == 0) {
+      return 0.;
+    }
+
+    int max_newton_iterations = 1;
+    // For small values, ensure some refinement iterations
+    if(k < 1e-10) {
+      max_newton_iterations = 7;
+    }
+
+    final double g = logGamma(k); // == logGamma(v/2)
+
+    // Phase I, an initial rough approximation
+    // First half of AS 91
+    double ch = chisquaredProbitApproximation(p, 2 * k, g);
+    // Second hald of AS 91 follows:
+    // Refine ChiSquared approximation
+    chisq: {
+      if(Double.isInfinite(ch)) {
+        // Cannot refine infinity
+        max_newton_iterations = 0;
+        break chisq;
+      }
+      if(ch < EPS2) {
+        // Do not iterate, but refine with newton method
+        max_newton_iterations = 20;
+        break chisq;
+      }
+      if(p > 1 - 1e-14 || p < 1e-100) {
+        // Not in appropriate value range for AS 91
+        max_newton_iterations = 20;
+        break chisq;
+      }
+
+      // Phase II: Iteration
+      final double c = k - 1;
+      final double ch0 = ch; // backup initial approximation
+      for(int i = 1; i <= MAXIT; i++) {
+        final double q = ch; // previous approximation
+        final double p1 = 0.5 * ch;
+        final double p2 = p - regularizedGammaP(k, p1);
+        if(Double.isInfinite(p2) || ch <= 0) {
+          ch = ch0;
+          max_newton_iterations = 27;
+          break chisq;
+        }
+        { // Taylor series of AS 91: iteration via "goto 4"
+          final double t = p2 * Math.exp(k * MathUtil.LOG2 + g + p1 - c * Math.log(ch));
+          final double b = t / ch;
+          final double a = 0.5 * t - b * c;
+          final double s1 = (210. + a * (140. + a * (105. + a * (84. + a * (70. + 60. * a))))) / 420.;
+          final double s2 = (420. + a * (735. + a * (966. + a * (1141. + 1278 * a)))) / 2520.;
+          final double s3 = (210. + a * (462. + a * (707. + 932. * a))) / 2520.;
+          final double s4 = (252. + a * (672. + 1182. * a) + c * (294. + a * (889. + 1740. * a))) / 5040.;
+          final double s5 = (84. + 2264. * a + c * (1175. + 606. * a)) / 2520.;
+          final double s6 = (120. + c * (346. + 127. * c)) / 5040.;
+          ch += t * (1 + 0.5 * t * s1 - b * c * (s1 - b * (s2 - b * (s3 - b * (s4 - b * (s5 - b * s6))))));
+        }
+        if(Math.abs(q - ch) < EPS2 * ch) {
+          break chisq;
+        }
+        // Divergence treatment, from GNU R
+        if(Math.abs(q - ch) > 0.1 * Math.abs(ch)) {
+          ch = ((ch < q) ? 0.9 : 1.1) * q;
+        }
+      }
+      LoggingUtil.warning("No convergence in AS 91 Gamma probit.");
+      // no convergence in MAXIT iterations -- but we add Newton now...
+    }
+    double x = 0.5 * ch / theta;
+    if(max_newton_iterations > 0) {
+      // Refine result using final Newton steps.
+      // TODO: add unit tests that show an improvement! Maybe in logscale only?
+      x = gammaQuantileNewtonRefinement(Math.log(p), k, theta, max_newton_iterations, x);
+    }
+    return x;
+  }
+
+  /**
+   * Refinement of ChiSquared probit using Newton iterations.
+   * 
+   * A trick used by GNU R to improve precision.
+   * 
+   * @param logpt Target value of log p
+   * @param k Alpha
+   * @param theta Theta = 1 / Beta
+   * @param maxit Maximum number of iterations to do
+   * @param x Initial estimate
+   * @return Refined value
+   */
+  protected static double gammaQuantileNewtonRefinement(final double logpt, final double k, final double theta, final int maxit, double x) {
+    final double EPS_N = 1e-15; // Precision threshold
+    // 0 is not possible, try MIN_NORMAL instead
+    if(x <= 0) {
+      x = Double.MIN_NORMAL;
+    }
+    // Current estimation
+    double logpc = logcdf(x, k, theta);
+    if(x == Double.MIN_NORMAL && logpc > logpt * (1. + 1e-7)) {
+      return 0.;
+    }
+    if(logpc == Double.NEGATIVE_INFINITY) {
+      return 0.;
+    }
+    // Refine by newton iterations
+    for(int i = 0; i < maxit; i++) {
+      // Error of current approximation
+      final double logpe = logpc - logpt;
+      if(Math.abs(logpe) < Math.abs(EPS_N * logpt)) {
+        break;
+      }
+      // Step size is controlled by PDF:
+      final double g = logpdf(x, k, theta);
+      if(g == Double.NEGATIVE_INFINITY) {
+        break;
+      }
+      final double newx = x - logpe * Math.exp(logpc - g);
+      // New estimate:
+      logpc = logcdf(newx, k, theta);
+      if(Math.abs(logpc - logpt) > Math.abs(logpe) || (i > 0 && Math.abs(logpc - logpt) == Math.abs(logpe))) {
+        // no further improvement
+        break;
+      }
+      x = newx;
+    }
+    return x;
+  }
+
+  /**
+   * Compute the Psi / Digamma function
+   * 
+   * Reference:
+   * <p>
+   * J. M. Bernando<br />
+   * Algorithm AS 103: Psi (Digamma) Function<br />
+   * Statistical Algorithms
+   * </p>
+   * 
+   * TODO: is there a more accurate version maybe in R?
+   * 
+   * @param x Position
+   * @return digamma value
+   */
+  @Reference(authors = "J. M. Bernando", title = "Algorithm AS 103: Psi (Digamma) Function", booktitle = "Statistical Algorithms")
+  public static double digamma(double x) {
+    if(!(x > 0)) {
+      return Double.NaN;
+    }
+    // Method of equation 5:
+    if(x <= 1e-5) {
+      return -EULERS_CONST - 1. / x;
+    }
+    // Method of equation 4:
+    else if(x > 49.) {
+      final double ix2 = 1. / (x * x);
+      // Partial series expansion
+      return Math.log(x) - 0.5 / x - ix2 * ((1.0 / 12.) + ix2 * (1.0 / 120. - ix2 / 252.));
+      // + O(x^8) error
+    }
+    else {
+      // Stirling expansion
+      return digamma(x + 1.) - 1. / x;
+    }
+  }
+
+  /**
+   * Compute the Trigamma function. Based on digamma.
+   * 
+   * TODO: is there a more accurate version maybe in R?
+   * 
+   * @param x Position
+   * @return trigamma value
+   */
+  public static double trigamma(double x) {
+    if(!(x > 0)) {
+      return Double.NaN;
+    }
+    // Method of equation 5:
+    if(x <= 1e-5) {
+      return 1. / (x * x);
+    }
+    // Method of equation 4:
+    else if(x > 49.) {
+      final double ix2 = 1. / (x * x);
+      // Partial series expansion
+      return 1 / x - ix2 / 2. + ix2 / x * (1.0 / 6. - ix2 * (1.0 / 30. + ix2 / 42.));
+      // + O(x^8) error
+    }
+    else {
+      // Stirling expansion
+      return trigamma(x + 1.) - 1. / (x * x);
+    }
+  }
+
+  /**
+   * Mean least squares estimation of Gamma distribution to a set of
+   * observations.
+   * 
+   * @param data Data
+   * @return Assumed distribution
+   */
+  public static GammaDistribution estimate(double[] data) {
+    return estimate(data, data.length);
+  }
+  
+  /**
+   * Mean least squares estimation of Gamma distribution to a set of
+   * observations.
+   * 
+   * Reference:
+   * <p>
+   * Maximum likelihood estimation of the parameters of the gamma distribution
+   * and their bias<br />
+   * S. C. Choi, R. Wette<br />
+   * in: Technometrics
+   * </p>
+   * 
+   * @param data Data
+   * @param len Length of array
+   * @return Assumed distribution
+   */
+  @Reference(title = "Maximum likelihood estimation of the parameters of the gamma distribution and their bias", authors = "S. C. Choi, R. Wette", booktitle = "Technometrics", url = "http://www.jstor.org/stable/10.2307/1266892")
+  public static GammaDistribution estimate(double[] data, int len) {
+    double meanx = 0, meanlogx = 0;
+    for(int i = 0; i < len; i++) {
+      final double logx = Math.log(data[i]);
+      final double deltax = data[i] - meanx;
+      final double deltalogx = logx - meanlogx;
+      meanx += deltax / (i + 1.);
+      meanlogx += deltalogx / (i + 1.);
+    }
+    // Initial approximation
+    final double logmeanx = Math.log(meanx);
+    final double diff = logmeanx - meanlogx;
+    double k = (3 - diff + Math.sqrt((diff - 3) * (diff - 3) + 24 * diff)) / (12 * diff);
+
+    // Refine via newton iteration, based on Choi and Wette equation
+    while(true) {
+      double kdelta = (Math.log(k) - digamma(k) - diff) / (1 / k - trigamma(k));
+      if(Math.abs(kdelta) < 1E-8 || Double.isNaN(kdelta)) {
+        break;
+      }
+      k += kdelta;
+    }
+    // Estimate theta:
+    final double theta = k / meanx;
+    return new GammaDistribution(k, theta);
+  }
 }
 \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/NormalDistribution.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/NormalDistribution.java
index 919cc2e3..9180b59e 100644
--- a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/NormalDistribution.java
+++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/NormalDistribution.java
@@ -32,7 +32,7 @@ import de.lmu.ifi.dbs.elki.math.MathUtil;
  * 
  * @author Erich Schubert
  */
-public class NormalDistribution implements Distribution {
+public class NormalDistribution implements DistributionWithRandom {
   /**
    * Coefficients for erf approximation.
    * 
@@ -148,20 +148,25 @@ public class NormalDistribution implements Distribution {
   public double pdf(double val) {
     return pdf(val, mean, stddev);
   }
-  
+
   @Override
   public double cdf(double val) {
     return cdf(val, mean, stddev);
   }
 
   @Override
+  public double quantile(double q) {
+    return quantile(q, mean, stddev);
+  }
+
+  @Override
   public double nextRandom() {
     return mean + random.nextGaussian() * stddev;
   }
 
   @Override
   public String toString() {
-    return "Normal Distribution (mean="+mean+", stddev="+stddev+")";
+    return "NormalDistribution(mean=" + mean + ", stddev=" + stddev + ")";
   }
 
   /**
@@ -195,7 +200,7 @@ public class NormalDistribution implements Distribution {
     if(Double.isInfinite(x)) {
       return (x < 0.0) ? 2 : 0;
     }
-  
+
     double result = Double.NaN;
     double absx = Math.abs(x);
     // First approximation interval
@@ -249,7 +254,7 @@ public class NormalDistribution implements Distribution {
    * @return erfinv(x)
    */
   public static double erfinv(double x) {
-    return standardNormalProbit(0.5 * (x + 1)) / MathUtil.SQRT2;
+    return standardNormalQuantile(0.5 * (x + 1)) / MathUtil.SQRT2;
   }
 
   /**
@@ -261,10 +266,13 @@ public class NormalDistribution implements Distribution {
    * by Peter John Acklam
    * </p>
    * 
+   * FIXME: precision of this seems to be rather low, compared to our other
+   * functions. Only about 8-9 digits agree with SciPy/GNU R.
+   * 
    * @param d Quantile. Must be in [0:1], obviously.
    * @return Inverse erf.
    */
-  public static double standardNormalProbit(double d) {
+  public static double standardNormalQuantile(double d) {
     if(d == 0) {
       return Double.NEGATIVE_INFINITY;
     }
@@ -319,7 +327,7 @@ public class NormalDistribution implements Distribution {
    * @return The CDF of the normal given distribution at x.
    */
   public static double cdf(double x, double mu, double sigma) {
-    return (1 + erf(x / Math.sqrt(2))) / 2;
+    return .5 * (1 + erf((x - mu) / (MathUtil.SQRT2 * sigma)));
   }
 
   /**
@@ -331,7 +339,7 @@ public class NormalDistribution implements Distribution {
    * @param sigma Standard deviation.
    * @return The probit of the normal given distribution at x.
    */
-  public static double probit(double x, double mu, double sigma) {
-    return mu + sigma * standardNormalProbit(x);
+  public static double quantile(double x, double mu, double sigma) {
+    return mu + sigma * standardNormalQuantile(x);
   }
 }
diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/PoissonDistribution.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/PoissonDistribution.java
new file mode 100644
index 00000000..53fb0dc8
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/PoissonDistribution.java
@@ -0,0 +1,411 @@
+package de.lmu.ifi.dbs.elki.math.statistics.distribution;
+
+import de.lmu.ifi.dbs.elki.math.MathUtil;
+import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
+import de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException;
+import de.lmu.ifi.dbs.elki.utilities.exceptions.ExceptionMessages;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2012
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/**
+ * INCOMPLETE implementation of the poisson distribution.
+ * 
+ * TODO: continue implementing, CDF, invcdf and nextRandom are missing
+ * 
+ * References:
+ * <p>
+ * Catherine Loader<br />
+ * Fast and Accurate Computation of Binomial Probabilities.
+ * </p>
+ * 
+ * @author Erich Schubert
+ */
+public class PoissonDistribution implements Distribution {
+  /**
+   * Number of tries
+   */
+  private int n;
+
+  /**
+   * Success probability
+   */
+  private double p;
+
+  /** Stirling error constants: 1./12 */
+  private final static double S0 = 0.08333333333333333333333d;
+
+  /** Stirling error constants: 1./360 */
+  private final static double S1 = 0.0027777777777777777777777777778d;
+
+  /** Stirling error constants: 1./1260 */
+  private final static double S2 = 0.00079365079365079365079365d;
+
+  /** Stirling error constants: 1./1680 */
+  private final static double S3 = 0.000595238095238095238095238095d;
+
+  /** Stirling error constants: 1./1188 */
+  private final static double S4 = 0.00084175084175084175084175084175d;
+
+  /**
+   * Exact table values for n <= 15 in steps of 0.5
+   * 
+   * sfe[n] = ln( (n!*e^n)/((n^n)*sqrt(2*pi*n)) )
+   */
+  private final static double STIRLING_EXACT_ERROR[] = {//
+  0.0, // 0.0
+  0.1534264097200273452913848, // 0.5
+  0.0810614667953272582196702, // 1.0
+  0.0548141210519176538961390, // 1.5
+  0.0413406959554092940938221, // 2.0
+  0.03316287351993628748511048, // 2.5
+  0.02767792568499833914878929, // 3.0
+  0.02374616365629749597132920, // 3.5
+  0.02079067210376509311152277, // 4.0
+  0.01848845053267318523077934, // 4.5
+  0.01664469118982119216319487, // 5.0
+  0.01513497322191737887351255, // 5.5
+  0.01387612882307074799874573, // 6.0
+  0.01281046524292022692424986, // 6.5
+  0.01189670994589177009505572, // 7.0
+  0.01110455975820691732662991, // 7.5
+  0.010411265261972096497478567, // 8.0
+  0.009799416126158803298389475, // 8.5
+  0.009255462182712732917728637, // 9.0
+  0.008768700134139385462952823, // 9.5
+  0.008330563433362871256469318, // 10.0
+  0.007934114564314020547248100, // 10.5
+  0.007573675487951840794972024, // 11.0
+  0.007244554301320383179543912, // 11.5
+  0.006942840107209529865664152, // 12.0
+  0.006665247032707682442354394, // 12.5
+  0.006408994188004207068439631, // 13.0
+  0.006171712263039457647532867, // 13.5
+  0.005951370112758847735624416, // 14.0
+  0.005746216513010115682023589, // 14.5
+  0.0055547335519628013710386900 // 15.0
+  };
+
+  /**
+   * Constructor.
+   * 
+   * Private: API not yet completely implemented!
+   * 
+   * @param n Number of tries
+   * @param p Success probability
+   */
+  public PoissonDistribution(int n, double p) {
+    super();
+    this.n = n;
+    this.p = p;
+  }
+
+  /**
+   * Poisson PMF for integer values.
+   * 
+   * @param x integer values
+   * @return Probability
+   */
+  @Reference(title = "Fast and accurate computation of binomial probabilities", authors = "C. Loader", booktitle = "", url = "http://projects.scipy.org/scipy/raw-attachment/ticket/620/loader2000Fast.pdf")
+  public double pmf(int x) {
+    // Invalid values
+    if(x < 0 || x > n) {
+      return 0.0;
+    }
+    // Extreme probabilities
+    if(p <= 0d) {
+      return x == 0 ? 1.0 : 0.0;
+    }
+    if(p >= 1d) {
+      return x == n ? 1.0 : 0.0;
+    }
+    // Extreme values of x
+    if(x == 0) {
+      if(p < 0.1) {
+        return Math.exp(-devianceTerm(n, n * (1.0 - p)) - n * p);
+      }
+      else {
+        return Math.exp(n * Math.log(1.0 - p));
+      }
+    }
+    if(x == n) {
+      if(p > 0.9) {
+        return Math.exp(-devianceTerm(n, n * p) - n * (1 - p));
+      }
+      else {
+        return Math.exp(n * Math.log(p));
+      }
+    }
+
+    final double lc = stirlingError(n) - stirlingError(x) - stirlingError(n - x) - devianceTerm(x, n * p) - devianceTerm(n - x, n * (1.0 - p));
+    final double f = (MathUtil.TWOPI * x * (n - x)) / n;
+    return Math.exp(lc) / Math.sqrt(f);
+  }
+
+  @Override
+  @Reference(title = "Fast and accurate computation of binomial probabilities", authors = "C. Loader", booktitle = "", url = "http://projects.scipy.org/scipy/raw-attachment/ticket/620/loader2000Fast.pdf")
+  public double pdf(double x) {
+    // Invalid values
+    if(x < 0 || x > n) {
+      return 0.0;
+    }
+    // Extreme probabilities
+    if(p <= 0d) {
+      return x == 0 ? 1.0 : 0.0;
+    }
+    if(p >= 1d) {
+      return x == n ? 1.0 : 0.0;
+    }
+    final double q = 1 - p;
+    // FIXME: check for x to be integer, return 0 otherwise?
+
+    // Extreme values of x
+    if(x == 0) {
+      if(p < 0.1) {
+        return Math.exp(-devianceTerm(n, n * q) - n * p);
+      }
+      else {
+        return Math.exp(n * Math.log(q));
+      }
+    }
+    if(x == n) {
+      if(p > 0.9) {
+        return Math.exp(-devianceTerm(n, n * p) - n * q);
+      }
+      else {
+        return Math.exp(n * Math.log(p));
+      }
+    }
+    final double lc = stirlingError(n) - stirlingError(x) - stirlingError(n - x) - devianceTerm(x, n * p) - devianceTerm(n - x, n * q);
+    final double f = (MathUtil.TWOPI * x * (n - x)) / n;
+    return Math.exp(lc) / Math.sqrt(f);
+  }
+
+  // FIXME: implement!
+  @Override
+  public double cdf(double val) {
+    throw new AbortException(ExceptionMessages.UNSUPPORTED_NOT_YET);
+  }
+
+  // FIXME: implement!
+  @Override
+  public double quantile(double val) {
+    throw new AbortException(ExceptionMessages.UNSUPPORTED_NOT_YET);
+  }
+
+  /**
+   * Compute the poisson distribution PDF with an offset of + 1
+   * 
+   * pdf(x_plus_1 - 1, lambda)
+   * 
+   * @param x_plus_1 x+1
+   * @param lambda Lambda
+   * @return pdf
+   */
+  public static double poissonPDFm1(double x_plus_1, double lambda) {
+    if(Double.isInfinite(lambda)) {
+      return 0.;
+    }
+    if(x_plus_1 > 1) {
+      return rawProbability(x_plus_1 - 1, lambda);
+    }
+    if(lambda > Math.abs(x_plus_1 - 1) * MathUtil.LOG2 * Double.MAX_EXPONENT / 1e-14) {
+      return Math.exp(-lambda - GammaDistribution.logGamma(x_plus_1));
+    }
+    else {
+      return rawProbability(x_plus_1, lambda) * (x_plus_1 / lambda);
+    }
+  }
+
+  /**
+   * Compute the poisson distribution PDF with an offset of + 1
+   * 
+   * log pdf(x_plus_1 - 1, lambda)
+   * 
+   * @param x_plus_1 x+1
+   * @param lambda Lambda
+   * @return pdf
+   */
+  public static double logpoissonPDFm1(double x_plus_1, double lambda) {
+    if(Double.isInfinite(lambda)) {
+      return Double.NEGATIVE_INFINITY;
+    }
+    if(x_plus_1 > 1) {
+      return rawLogProbability(x_plus_1 - 1, lambda);
+    }
+    if(lambda > Math.abs(x_plus_1 - 1) * MathUtil.LOG2 * Double.MAX_EXPONENT / 1e-14) {
+      return -lambda - GammaDistribution.logGamma(x_plus_1);
+    }
+    else {
+      return rawLogProbability(x_plus_1, lambda) + Math.log(x_plus_1 / lambda);
+    }
+  }
+
+  /**
+   * Calculates the Striling Error
+   * 
+   * stirlerr(n) = ln(n!) - ln(sqrt(2*pi*n)*(n/e)^n)
+   * 
+   * @param n Parameter n
+   * @return Stirling error
+   */
+  @Reference(title = "Fast and accurate computation of binomial probabilities", authors = "C. Loader", booktitle = "", url = "http://projects.scipy.org/scipy/raw-attachment/ticket/620/loader2000Fast.pdf")
+  private static double stirlingError(int n) {
+    // Try to use a table value:
+    if(n < 16) {
+      return STIRLING_EXACT_ERROR[n * 2];
+    }
+    final double nn = n * n;
+    // Use the appropriate number of terms
+    if(n > 500) {
+      return (S0 - S1 / nn) / n;
+    }
+    if(n > 80) {
+      return ((S0 - (S1 - S2 / nn)) / nn) / n;
+    }
+    if(n > 35) {
+      return ((S0 - (S1 - (S2 - S3 / nn) / nn) / nn) / n);
+    }
+    return ((S0 - (S1 - (S2 - (S3 - S4 / nn) / nn) / nn) / nn) / n);
+  }
+
+  /**
+   * Calculates the Striling Error
+   * 
+   * stirlerr(n) = ln(n!) - ln(sqrt(2*pi*n)*(n/e)^n)
+   * 
+   * @param n Parameter n
+   * @return Stirling error
+   */
+  @Reference(title = "Fast and accurate computation of binomial probabilities", authors = "C. Loader", booktitle = "", url = "http://projects.scipy.org/scipy/raw-attachment/ticket/620/loader2000Fast.pdf")
+  private static double stirlingError(double n) {
+    if(n < 16.0) {
+      // Our table has a step size of 0.5
+      final double n2 = 2.0 * n;
+      if(Math.floor(n2) == n2) { // Exact match
+        return STIRLING_EXACT_ERROR[(int) n2];
+      }
+      else {
+        return GammaDistribution.logGamma(n + 1.0) - (n + 0.5) * Math.log(n) + n - MathUtil.LOGSQRTTWOPI;
+      }
+    }
+    final double nn = n * n;
+    if(n > 500.0) {
+      return (S0 - S1 / nn) / n;
+    }
+    if(n > 80.0) {
+      return ((S0 - (S1 - S2 / nn)) / nn) / n;
+    }
+    if(n > 35.0) {
+      return ((S0 - (S1 - (S2 - S3 / nn) / nn) / nn) / n);
+    }
+    return ((S0 - (S1 - (S2 - (S3 - S4 / nn) / nn) / nn) / nn) / n);
+  }
+
+  /**
+   * Evaluate the deviance term of the saddle point approximation.
+   * 
+   * bd0(x,np) = x*ln(x/np)+np-x
+   * 
+   * @param x probability density function position
+   * @param np product of trials and success probability: n*p
+   * @return Deviance term
+   */
+  @Reference(title = "Fast and accurate computation of binomial probabilities", authors = "C. Loader", booktitle = "", url = "http://projects.scipy.org/scipy/raw-attachment/ticket/620/loader2000Fast.pdf")
+  private static double devianceTerm(double x, double np) {
+    if(Math.abs(x - np) < 0.1 * (x + np)) {
+      final double v = (x - np) / (x + np);
+
+      double s = (x - np) * v;
+      double ej = 2.0d * x * v;
+      for(int j = 1;; j++) {
+        ej *= v * v;
+        final double s1 = s + ej / (2 * j + 1);
+        if(s1 == s) {
+          return s1;
+        }
+        s = s1;
+      }
+    }
+    return x * Math.log(x / np) + np - x;
+  }
+
+  /**
+   * Poisson distribution probability, but also for non-integer arguments.
+   * 
+   * lb^x exp(-lb) / x!
+   * 
+   * @param x X
+   * @param lambda lambda
+   * @return Poisson distribution probability
+   */
+  public static double rawProbability(double x, double lambda) {
+    // Extreme lambda
+    if(lambda == 0) {
+      return ((x == 0) ? 1. : 0.);
+    }
+    // Extreme values
+    if(Double.isInfinite(lambda) || x < 0) {
+      return 0.;
+    }
+    if(x <= lambda * Double.MIN_NORMAL) {
+      return Math.exp(-lambda);
+    }
+    if(lambda < x * Double.MIN_NORMAL) {
+      double r = -lambda + x * Math.log(lambda) - GammaDistribution.logGamma(x + 1);
+      return Math.exp(r);
+    }
+    final double f = MathUtil.TWOPI * x;
+    final double y = -stirlingError(x) - devianceTerm(x, lambda);
+    return Math.exp(y) / Math.sqrt(f);
+  }
+
+  /**
+   * Poisson distribution probability, but also for non-integer arguments.
+   * 
+   * lb^x exp(-lb) / x!
+   * 
+   * @param x X
+   * @param lambda lambda
+   * @return Poisson distribution probability
+   */
+  public static double rawLogProbability(double x, double lambda) {
+    // Extreme lambda
+    if(lambda == 0) {
+      return ((x == 0) ? 1. : Double.NEGATIVE_INFINITY);
+    }
+    // Extreme values
+    if(Double.isInfinite(lambda) || x < 0) {
+      return Double.NEGATIVE_INFINITY;
+    }
+    if(x <= lambda * Double.MIN_NORMAL) {
+      return -lambda;
+    }
+    if(lambda < x * Double.MIN_NORMAL) {
+      return -lambda + x * Math.log(lambda) - GammaDistribution.logGamma(x + 1);
+    }
+    final double f = MathUtil.TWOPI * x;
+    final double y = -stirlingError(x) - devianceTerm(x, lambda);
+    return -0.5 * Math.log(f) + y;
+  }
+}
+\ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/StudentsTDistribution.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/StudentsTDistribution.java
new file mode 100644
index 00000000..2e9e0d15
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/StudentsTDistribution.java
@@ -0,0 +1,90 @@
+package de.lmu.ifi.dbs.elki.math.statistics.distribution;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2012
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException;
+import de.lmu.ifi.dbs.elki.utilities.exceptions.ExceptionMessages;
+
+/**
+ * Student's t distribution.
+ * 
+ * FIXME: add quantile function!
+ * 
+ * @author Jan Brusis
+ */
+public class StudentsTDistribution implements Distribution {
+  /**
+   * Degrees of freedom
+   */
+  private final int v;
+
+  /**
+   * Constructor.
+   * 
+   * @param v Degrees of freedom
+   */
+  public StudentsTDistribution(int v) {
+    this.v = v;
+  }
+
+  @Override
+  public double pdf(double val) {
+    return pdf(val, v);
+  }
+
+  @Override
+  public double cdf(double val) {
+    return cdf(val, v);
+  }
+
+  // FIXME: implement!
+  @Override
+  public double quantile(double val) {
+    throw new AbortException(ExceptionMessages.UNSUPPORTED_NOT_YET);
+  }
+
+  /**
+   * Static version of the t distribution's PDF.
+   * 
+   * @param val value to evaluate
+   * @param v degrees of freedom
+   * @return f(val,v)
+   */
+  public static double pdf(double val, int v) {
+    // TODO: improve precision by computing "exp" last?
+    return Math.exp(GammaDistribution.logGamma((v + 1) / 2) - GammaDistribution.logGamma(v / 2)) * (1 / Math.sqrt(v * Math.PI)) * Math.pow(1 + (val * val) / v, -((v + 1) / 2));
+  }
+
+  /**
+   * Static version of the CDF of the t-distribution for t > 0
+   * 
+   * @param val value to evaluate
+   * @param v degrees of freedom
+   * @return F(val, v)
+   */
+  public static double cdf(double val, int v) {
+    double x = v / (val * val + v);
+    return 1 - (0.5 * BetaDistribution.regularizedIncBeta(x, v / 2, 0.5));
+  }
+}
+\ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/UniformDistribution.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/UniformDistribution.java
index 9571cfd3..4f54fbf9 100644
--- a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/UniformDistribution.java
+++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/UniformDistribution.java
@@ -30,7 +30,7 @@ import java.util.Random;
  * 
  * @author Erich Schubert
  */
-public class UniformDistribution implements Distribution {
+public class UniformDistribution implements DistributionWithRandom {
   /**
    * Minimum
    */
@@ -100,20 +100,20 @@ public class UniformDistribution implements Distribution {
     }
     return (val - min) / len;
   }
+  
+  @Override
+  public double quantile(double val) {
+    return min + len * val; 
+  }
 
   @Override
   public double nextRandom() {
     return min + random.nextDouble() * len;
   }
 
-  /**
-   * Simple toString explaining the distribution parameters.
-   * 
-   * Used in describing cluster models.
-   */
   @Override
   public String toString() {
-    return "Uniform Distribution (min=" + min + ", max=" + max + ")";
+    return "UniformDistribution(min=" + min + ", max=" + max + ")";
   }
 
   /**
diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/tests/GoodnessOfFitTest.java b/src/de/lmu/ifi/dbs/elki/math/statistics/tests/GoodnessOfFitTest.java
new file mode 100644
index 00000000..f50f469f
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/math/statistics/tests/GoodnessOfFitTest.java
@@ -0,0 +1,49 @@
+package de.lmu.ifi.dbs.elki.math.statistics.tests;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2012
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.Parameterizable;
+
+/**
+ * Interface for the statistical test used by HiCS.
+ * 
+ * Provides a single method that calculates the deviation between two data
+ * samples, given as arrays of double values
+ * 
+ * @author Jan Brusis
+ * @author Erich Schubert
+ */
+public interface GoodnessOfFitTest extends Parameterizable {
+  /**
+   * Measure the deviation of a full sample from a conditional sample.
+   * 
+   * Sample arrays *may* be modified, e.g. sorted, by the test.
+   * 
+   * @param fullSample Full sample
+   * @param conditionalSample Conditional sample
+   * 
+   * @return Deviation
+   */
+  public double deviation(double[] fullSample, double[] conditionalSample);
+}
diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/tests/KolmogorovSmirnovTest.java b/src/de/lmu/ifi/dbs/elki/math/statistics/tests/KolmogorovSmirnovTest.java
new file mode 100644
index 00000000..236f26b8
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/math/statistics/tests/KolmogorovSmirnovTest.java
@@ -0,0 +1,117 @@
+package de.lmu.ifi.dbs.elki.math.statistics.tests;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2012
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import java.util.Arrays;
+
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
+
+/**
+ * Kolmogorov-Smirnov test.
+ * 
+ * Class that tests two given real-valued data samples on whether they might
+ * have originated from the same underlying distribution using the
+ * Kolmogorov-Smirnov test statistic that compares the two empirical cumulative
+ * distribution functions. The KS statistic is defined as the maximum absolute
+ * difference of the empirical CDFs.
+ * 
+ * @author Erich Schubert
+ */
+public class KolmogorovSmirnovTest implements GoodnessOfFitTest {
+  /**
+   * Static instance
+   */
+  public static KolmogorovSmirnovTest STATIC = new KolmogorovSmirnovTest();
+
+  /**
+   * Constructor.
+   */
+  public KolmogorovSmirnovTest() {
+    super();
+  }
+
+  @Override
+  public double deviation(double[] fullSample, double[] conditionalSample) {
+    Arrays.sort(fullSample);
+    Arrays.sort(conditionalSample);
+    return calculateTestStatistic(fullSample, conditionalSample);
+  }
+
+  /**
+   * Calculates the maximum distance between the two empirical CDFs of two data
+   * samples. The sample positions and CDFs must be synchronized!
+   * 
+   * @param sample1 first data sample positions
+   * @param sample2 second data sample positions
+   * @return the largest distance between both functions
+   */
+  public static double calculateTestStatistic(double[] sample1, double[] sample2) {
+    double maximum = 0.0;
+
+    int index1 = 0, index2 = 0;
+    double cdf1 = 0, cdf2 = 0;
+
+    // Parallel iteration over both curves. We can stop if we reach either end,
+    // As the difference can then only decrease!
+    while(index1 < sample1.length && index2 < sample2.length) {
+      // Next (!) positions
+      final double x1 = sample1[index1], x2 = sample2[index2];
+      // Advance on first curve
+      if(x1 <= x2) {
+        index1++;
+        // Handle multiple points with same x:
+        while(index1 < sample1.length && sample1[index1] == x1) {
+          index1++;
+        }
+        cdf1 = ((double) index1) / sample1.length;
+      }
+      // Advance on second curve
+      if(x1 >= x2) {
+        index2++;
+        // Handle multiple points with same x:
+        while(index2 < sample2.length && sample2[index2] == x2) {
+          index2++;
+        }
+        cdf2 = ((double) index2) / sample2.length;
+      }
+      maximum = Math.max(maximum, Math.abs(cdf1 - cdf2));
+    }
+
+    return maximum;
+  }
+
+  /**
+   * Parameterizer, to use the static instance.
+   * 
+   * @author Erich Schubert
+   * 
+   * @apiviz.exclude
+   */
+  public static class Parameterizer extends AbstractParameterizer {
+    @Override
+    protected KolmogorovSmirnovTest makeInstance() {
+      return STATIC;
+    }
+  }
+}
+\ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/tests/WelchTTest.java b/src/de/lmu/ifi/dbs/elki/math/statistics/tests/WelchTTest.java
new file mode 100644
index 00000000..554a22db
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/math/statistics/tests/WelchTTest.java
@@ -0,0 +1,124 @@
+package de.lmu.ifi.dbs.elki.math.statistics.tests;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2012
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import de.lmu.ifi.dbs.elki.math.MeanVariance;
+import de.lmu.ifi.dbs.elki.math.statistics.distribution.StudentsTDistribution;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
+
+/**
+ * Calculates a test statistic according to Welch's t test for two samples
+ * Supplies methods for calculating the degrees of freedom according to the
+ * Welch-Satterthwaite Equation. Also directly calculates a two-sided p-value
+ * for the underlying t-distribution
+ * 
+ * @author Jan Brusis
+ * @author Erich Schubert
+ * 
+ * @apiviz.uses StudentsTDistribution
+ */
+public class WelchTTest implements GoodnessOfFitTest {
+  /**
+   * Static instance.
+   */
+  public static final WelchTTest STATIC = new WelchTTest();
+
+  /**
+   * Constructor.
+   */
+  public WelchTTest() {
+    super();
+  }
+
+  @Override
+  public double deviation(double[] sample1, double[] sample2) {
+    MeanVariance mv1 = new MeanVariance(), mv2 = new MeanVariance();
+    for(double d : sample1) {
+      mv1.put(d);
+    }
+    for(double d : sample2) {
+      mv2.put(d);
+    }
+
+    final double t = calculateTestStatistic(mv1, mv2);
+    final int v = calculateDOF(mv1, mv2);
+    return 1 - calculatePValue(t, v);
+  }
+
+  /**
+   * Calculate the statistic of Welch's t test using statistical moments of the
+   * provided data samples
+   * 
+   * @param mv1 Mean and variance of first sample
+   * @param mv2 Mean and variance of second sample
+   * @return Welch's t statistic
+   */
+  public static double calculateTestStatistic(MeanVariance mv1, MeanVariance mv2) {
+    final double delta = mv1.getMean() - mv2.getMean();
+    final double relvar1 = mv1.getSampleVariance() / mv1.getCount();
+    final double relvar2 = mv2.getSampleVariance() / mv2.getCount();
+    return delta / Math.sqrt(relvar1 + relvar2);
+  }
+
+  /**
+   * Calculates the degree of freedom according to Welch-Satterthwaite
+   * 
+   * @param mv1 Mean and variance of first sample
+   * @param mv2 Mean and variance of second sample
+   * @return Estimated degrees of freedom.
+   */
+  public static int calculateDOF(MeanVariance mv1, MeanVariance mv2) {
+    final double relvar1 = mv1.getSampleVariance() / mv1.getCount();
+    final double relvar2 = mv2.getSampleVariance() / mv2.getCount();
+    final double wvariance = relvar1 + relvar2;
+    final double div = relvar1 * relvar1 / (mv1.getCount() - 1) + relvar2 * relvar2 / (mv2.getCount() - 1);
+    return (int) (wvariance * wvariance / div);
+  }
+
+  /**
+   * Calculates the two-sided p-Value of the underlying t-Distribution with v
+   * degrees of freedom
+   * 
+   * @param t Integration limit
+   * @param v Degrees of freedom
+   * @return p-Value
+   */
+  public static double calculatePValue(double t, int v) {
+    return 2 * (1 - StudentsTDistribution.cdf(Math.abs(t), v));
+  }
+
+  /**
+   * Parameterizer, to use the static instance.
+   * 
+   * @author Erich Schubert
+   * 
+   * @apiviz.exclude
+   */
+  public static class Parameterizer extends AbstractParameterizer {
+    @Override
+    protected WelchTTest makeInstance() {
+      return STATIC;
+    }
+  }
+}
+\ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/tests/package-info.java b/src/de/lmu/ifi/dbs/elki/math/statistics/tests/package-info.java
new file mode 100644
index 00000000..3e39a519
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/math/statistics/tests/package-info.java
@@ -0,0 +1,26 @@
+/**
+ * <p>Statistical tests.</p> 
+ */
+/*
+This file is part of ELKI:
+Environment for Developing KDD-Applications Supported by Index-Structures
+
+Copyright (C) 2012
+Ludwig-Maximilians-Universität München
+Lehr- und Forschungseinheit für Datenbanksysteme
+ELKI Development Team
+
+This program is free software: you can redistribute it and/or modify
+it under the terms of the GNU Affero General Public License as published by
+the Free Software Foundation, either version 3 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU Affero General Public License for more details.
+
+You should have received a copy of the GNU Affero General Public License
+along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+package de.lmu.ifi.dbs.elki.math.statistics.tests;
+\ No newline at end of file