summaryrefslogtreecommitdiff
path: root/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/BetaDistribution.java
diff options
context:
space:
mode:
Diffstat (limited to 'src/de/lmu/ifi/dbs/elki/math/statistics/distribution/BetaDistribution.java')
-rw-r--r--src/de/lmu/ifi/dbs/elki/math/statistics/distribution/BetaDistribution.java499
1 files changed, 499 insertions, 0 deletions
diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/BetaDistribution.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/BetaDistribution.java
new file mode 100644
index 00000000..1efe19dd
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/BetaDistribution.java
@@ -0,0 +1,499 @@
+package de.lmu.ifi.dbs.elki.math.statistics.distribution;
+
+import java.util.Random;
+
+import de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2012
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/**
+ * Beta Distribution with implementation of the regularized incomplete beta
+ * function
+ *
+ * @author Jan Brusis
+ * @author Erich Schubert
+ */
+public class BetaDistribution implements DistributionWithRandom {
+ /**
+ * Numerical precision to use
+ */
+ static final double NUM_PRECISION = 1E-15;
+
+ /**
+ * Limit of when to switch to quadrature method
+ */
+ static final double SWITCH = 3000;
+
+ /**
+ * Abscissas for Gauss-Legendre quadrature
+ */
+ static final double[] GAUSSLEGENDRE_Y = { 0.0021695375159141994, 0.011413521097787704, 0.027972308950302116, 0.051727015600492421, 0.082502225484340941, 0.12007019910960293, 0.16415283300752470, 0.21442376986779355, 0.27051082840644336, 0.33199876341447887, 0.39843234186401943, 0.46931971407375483, 0.54413605556657973, 0.62232745288031077, 0.70331500465597174, 0.78649910768313447, 0.87126389619061517, 0.95698180152629142 };
+
+ /**
+ * Weights for Gauss-Legendre quadrature
+ */
+ static final double[] GAUSSLEGENDRE_W = { 0.0055657196642445571, 0.012915947284065419, 0.020181515297735382, 0.027298621498568734, 0.034213810770299537, 0.040875750923643261, 0.047235083490265582, 0.053244713977759692, 0.058860144245324798, 0.064039797355015485, 0.068745323835736408, 0.072941885005653087, 0.076598410645870640, 0.079687828912071670, 0.082187266704339706, 0.084078218979661945, 0.085346685739338721, 0.085983275670394821 };
+
+ /**
+ * Shape parameter of beta distribution
+ */
+ private final double alpha;
+
+ /**
+ * Shape parameter of beta distribution
+ */
+ private final double beta;
+
+ /**
+ * For random number generation
+ */
+ private Random random;
+
+ /**
+ * Log beta(a, b) cache
+ */
+ private double logbab;
+
+ /**
+ * Constructor.
+ *
+ * @param a shape Parameter a
+ * @param b shape Parameter b
+ */
+ public BetaDistribution(double a, double b) {
+ this(a, b, new Random());
+ }
+
+ /**
+ * Constructor.
+ *
+ * @param a shape Parameter a
+ * @param b shape Parameter b
+ * @param random Random generator
+ */
+ public BetaDistribution(double a, double b, Random random) {
+ super();
+ if(a <= 0.0 || b <= 0.0) {
+ throw new IllegalArgumentException("Invalid parameters for Beta distribution.");
+ }
+
+ this.alpha = a;
+ this.beta = b;
+ this.logbab = logBeta(a, b);
+ this.random = random;
+ }
+
+ @Override
+ public double pdf(double val) {
+ if(val < 0. || val > 1.) {
+ return 0.;
+ }
+ if(val == 0.) {
+ if(alpha > 1.) {
+ return 0.;
+ }
+ if(alpha < 1.) {
+ return Double.POSITIVE_INFINITY;
+ }
+ return beta;
+ }
+ if(val == 1.) {
+ if(beta > 1.) {
+ return 0.;
+ }
+ if(beta < 1.) {
+ return Double.POSITIVE_INFINITY;
+ }
+ return alpha;
+ }
+ return Math.exp(-logbab + Math.log(val) * (alpha - 1) + Math.log1p(-val) * (beta - 1));
+ }
+
+ @Override
+ public double cdf(double x) {
+ if(alpha <= 0.0 || beta <= 0.0 || Double.isNaN(alpha) || Double.isNaN(beta) || Double.isNaN(x)) {
+ return Double.NaN;
+ }
+ if(x <= 0.0) {
+ return 0.0;
+ }
+ if(x >= 1.0) {
+ return 1.0;
+ }
+ if(alpha > SWITCH && beta > SWITCH) {
+ return regularizedIncBetaQuadrature(alpha, beta, x);
+ }
+ double bt = Math.exp(-logbab + alpha * Math.log(x) + beta * Math.log1p(-x));
+ if(x < (alpha + 1.0) / (alpha + beta + 2.0)) {
+ return bt * regularizedIncBetaCF(alpha, beta, x) / alpha;
+ }
+ else {
+ return 1.0 - bt * regularizedIncBetaCF(beta, alpha, 1.0 - x) / beta;
+ }
+ }
+
+ @Override
+ public double quantile(double x) {
+ // Valid parameters
+ if(x < 0 || x > 1 || Double.isNaN(x)) {
+ return Double.NaN;
+ }
+ if(x == 0) {
+ return 0.0;
+ }
+ if(x == 1) {
+ return 1.0;
+ }
+ // Simpler to compute inverse?
+ if(x > 0.5) {
+ return 1 - rawQuantile(1 - x, beta, alpha, logbab);
+ }
+ else {
+ return rawQuantile(x, alpha, beta, logbab);
+ }
+ }
+
+ @Override
+ public double nextRandom() {
+ double x = GammaDistribution.nextRandom(alpha, 1, random);
+ double y = GammaDistribution.nextRandom(beta, 1, random);
+ return x / (x + y);
+ }
+
+ @Override
+ public String toString() {
+ return "BetaDistribution(alpha=" + alpha + ", beta=" + beta + ")";
+ }
+
+ /**
+ * Static version of the CDF of the beta distribution
+ *
+ * @param val Value
+ * @param alpha Shape parameter a
+ * @param beta Shape parameter b
+ * @return cumulative density
+ */
+ public static double cdf(double val, double alpha, double beta) {
+ return regularizedIncBeta(val, alpha, beta);
+ }
+
+ /**
+ * Static version of the PDF of the beta distribution
+ *
+ * @param val Value
+ * @param alpha Shape parameter a
+ * @param beta Shape parameter b
+ * @return probability density
+ */
+ public static double pdf(double val, double alpha, double beta) {
+ if(alpha <= 0. || beta <= 0. || Double.isNaN(alpha) || Double.isNaN(beta) || Double.isNaN(val)) {
+ return Double.NaN;
+ }
+ if(val < 0. || val > 1.) {
+ return 0.;
+ }
+ if(val == 0.) {
+ if(alpha > 1.) {
+ return 0.;
+ }
+ if(alpha < 1.) {
+ return Double.POSITIVE_INFINITY;
+ }
+ return beta;
+ }
+ if(val == 1.) {
+ if(beta > 1.) {
+ return 0.;
+ }
+ if(beta < 1.) {
+ return Double.POSITIVE_INFINITY;
+ }
+ return alpha;
+ }
+ return Math.exp(-logBeta(alpha, beta) + Math.log(val) * (alpha - 1) + Math.log1p(-val) * (beta - 1));
+ }
+
+ /**
+ * Compute log beta(a,b)
+ *
+ * @param alpha Shape parameter a
+ * @param beta Shape parameter b
+ * @return Logarithm of result
+ */
+ public static double logBeta(double alpha, double beta) {
+ return GammaDistribution.logGamma(alpha) + GammaDistribution.logGamma(beta) - GammaDistribution.logGamma(alpha + beta);
+ }
+
+ /**
+ * Computes the regularized incomplete beta function I_x(a, b) which is also
+ * the CDF of the beta distribution. Based on the book "Numerical Recipes"
+ *
+ * @param alpha Parameter a
+ * @param beta Parameter b
+ * @param x Parameter x
+ * @return Value of the regularized incomplete beta function
+ */
+ public static double regularizedIncBeta(double x, double alpha, double beta) {
+ if(alpha <= 0.0 || beta <= 0.0 || Double.isNaN(alpha) || Double.isNaN(beta) || Double.isNaN(x)) {
+ return Double.NaN;
+ }
+ if(x <= 0.0) {
+ return 0.0;
+ }
+ if(x >= 1.0) {
+ return 1.0;
+ }
+ if(alpha > SWITCH && beta > SWITCH) {
+ return regularizedIncBetaQuadrature(alpha, beta, x);
+ }
+ double bt = Math.exp(-logBeta(alpha, beta) + alpha * Math.log(x) + beta * Math.log1p(-x));
+ if(x < (alpha + 1.0) / (alpha + beta + 2.0)) {
+ return bt * regularizedIncBetaCF(alpha, beta, x) / alpha;
+ }
+ else {
+ return 1.0 - bt * regularizedIncBetaCF(beta, alpha, 1.0 - x) / beta;
+ }
+ }
+
+ /**
+ * Returns the regularized incomplete beta function I_x(a, b) Includes the
+ * continued fraction way of computing, based on the book "Numerical Recipes".
+ *
+ * @param alpha Parameter a
+ * @param beta Parameter b
+ * @param x Parameter x
+ * @return result
+ */
+ protected static double regularizedIncBetaCF(double alpha, double beta, double x) {
+ final double FPMIN = Double.MIN_VALUE / NUM_PRECISION;
+ double qab = alpha + beta;
+ double qap = alpha + 1.0;
+ double qam = alpha - 1.0;
+ double c = 1.0;
+ double d = 1.0 - qab * x / qap;
+ if(Math.abs(d) < FPMIN) {
+ d = FPMIN;
+ }
+ d = 1.0 / d;
+ double h = d;
+ for(int m = 1; m < 10000; m++) {
+ int m2 = 2 * m;
+ double aa = m * (beta - m) * x / ((qam + m2) * (alpha + m2));
+ d = 1.0 + aa * d;
+ if(Math.abs(d) < FPMIN) {
+ d = FPMIN;
+ }
+ c = 1.0 + aa / c;
+ if(Math.abs(c) < FPMIN) {
+ c = FPMIN;
+ }
+ d = 1.0 / d;
+ h *= d * c;
+ aa = -(alpha + m) * (qab + m) * x / ((alpha + m2) * (qap + m2));
+ d = 1.0 + aa * d;
+ if(Math.abs(d) < FPMIN) {
+ d = FPMIN;
+ }
+ c = 1.0 + aa / c;
+ if(Math.abs(c) < FPMIN) {
+ c = FPMIN;
+ }
+ d = 1.0 / d;
+ double del = d * c;
+ h *= del;
+ if(Math.abs(del - 1.0) <= NUM_PRECISION) {
+ break;
+ }
+ }
+ return h;
+ }
+
+ /**
+ * Returns the regularized incomplete beta function I_x(a, b) by quadrature,
+ * based on the book "Numerical Recipes".
+ *
+ * @param alpha Parameter a
+ * @param beta Parameter b
+ * @param x Parameter x
+ * @return result
+ */
+ protected static double regularizedIncBetaQuadrature(double alpha, double beta, double x) {
+ double a1 = alpha - 1.0;
+ double b1 = beta - 1.0;
+ double mu = alpha / (alpha + beta);
+ double lnmu = Math.log(mu);
+ double lnmuc = Math.log1p(-mu);
+ double t = Math.sqrt(alpha * beta / ((alpha + beta) * (alpha + beta) * (alpha + beta + 1.0)));
+ double xu;
+ if(x > alpha / (alpha + beta)) {
+ if(x >= 1.0) {
+ return 1.0;
+ }
+ xu = Math.min(1.0, Math.max(mu + 10.0 * t, x + 5.0 * t));
+ }
+ else {
+ if(x <= 0.0) {
+ return 0.0;
+ }
+ xu = Math.max(0.0, Math.min(mu - 10.0 * t, x - 5.0 * t));
+ }
+ double sum = 0.0;
+ for(int i = 0; i < GAUSSLEGENDRE_Y.length; i++) {
+ t = x + (xu - x) * GAUSSLEGENDRE_Y[i];
+ sum += GAUSSLEGENDRE_W[i] * Math.exp(a1 * (Math.log(t) - lnmu) + b1 * (Math.log1p(-t) - lnmuc));
+ }
+ double ans = sum * (xu - x) * Math.exp(a1 * lnmu - GammaDistribution.logGamma(alpha) + b1 * lnmuc - GammaDistribution.logGamma(b1) + GammaDistribution.logGamma(alpha + beta));
+ return ans > 0 ? 1.0 - ans : -ans;
+ }
+
+ /**
+ * Compute quantile (inverse cdf) for Beta distributions.
+ *
+ * @param p Probability
+ * @param alpha Shape parameter a
+ * @param beta Shape parameter b
+ * @return Probit for Beta distribution
+ */
+ public static double quantile(double p, double alpha, double beta) {
+ // Valid parameters
+ if(Double.isNaN(alpha) || Double.isNaN(beta) || Double.isNaN(p) || alpha < 0. || beta < 0.) {
+ return Double.NaN;
+ }
+ if(p < 0 || p > 1) {
+ return Double.NaN;
+ }
+ if(p == 0) {
+ return 0.0;
+ }
+ if(p == 1) {
+ return 1.0;
+ }
+ // Simpler to compute inverse?
+ if(p > 0.5) {
+ return 1 - rawQuantile(1 - p, beta, alpha, logBeta(beta, alpha));
+ }
+ else {
+ return rawQuantile(p, alpha, beta, logBeta(alpha, beta));
+ }
+ }
+
+ /**
+ * Raw quantile function
+ *
+ * @param p P, must be 0 < p <= .5
+ * @param alpha Alpha
+ * @param beta Beta
+ * @param logbeta log Beta(alpha, beta)
+ * @return Position
+ */
+ protected static double rawQuantile(double p, double alpha, double beta, final double logbeta) {
+ // Initial estimate for x
+ double x;
+ {
+ // Very fast approximation of y.
+ double tmp = Math.sqrt(-2 * Math.log(p));
+ double y = tmp - (2.30753 + 0.27061 * tmp) / (1. + (0.99229 + 0.04481 * tmp) * tmp);
+
+ if(alpha > 1 && beta > 1) {
+ double r = (y * y - 3.) / 6.;
+ double s = 1. / (alpha + alpha - 1.);
+ double t = 1. / (beta + beta - 1.);
+ double h = 2. / (s + t);
+ double w = y * Math.sqrt(h + r) / h - (t - s) * (r + 5. / 6. - 2. / (3. * h));
+ x = alpha / (alpha + beta * Math.exp(w + w));
+ }
+ else {
+ double r = beta + beta;
+ double t = 1. / (9. * beta);
+ t = r * Math.pow(1. - t + y * Math.sqrt(t), 3.0);
+ if(t <= 0.) {
+ x = 1. - Math.exp((Math.log1p(-p) + Math.log(beta) + logbeta) / beta);
+ }
+ else {
+ t = (4. * alpha + r - 2.) / t;
+ if(t <= 1.) {
+ x = Math.exp((Math.log(p * alpha) + logbeta) / alpha);
+ }
+ else {
+ x = 1. - 2. / (t + 1.);
+ }
+ }
+ }
+ // Degenerate initial approximations
+ if(x < 3e-308 || x > 1 - 2.22e-16) {
+ x = 0.5;
+ }
+ }
+
+ // Newon-Raphson method using the CDF
+ {
+ final double ialpha = 1 - alpha;
+ final double ibeta = 1 - beta;
+
+ // Desired accuracy, as from GNU R adoption of AS 109
+ final double acu = Math.max(1e-300, Math.pow(10., -13 - 2.5 / (alpha * alpha) - .5 / (p * p)));
+ double prevstep = 0., y = 0., stepsize = 1;
+
+ for(int outer = 0; outer < 1000; outer++) {
+ // Current CDF value
+ double ynew = cdf(x, alpha, beta);
+ if(Double.isInfinite(ynew)) { // Degenerated.
+ return Double.NaN;
+ }
+ // Error gradient
+ ynew = (ynew - p) * Math.exp(logbeta + ialpha * Math.log(x) + ibeta * Math.log1p(-x));
+ if(ynew * y <= 0.) {
+ prevstep = Math.max(Math.abs(stepsize), 3e-308);
+ }
+ // Inner loop: try different step sizes: y * 3^-i
+ double g = 1, xnew = 0.;
+ for(int inner = 0; inner < 1000; inner++) {
+ stepsize = g * ynew;
+ if(Math.abs(stepsize) < prevstep) {
+ xnew = x - stepsize; // Candidate x
+ if(xnew >= 0. && xnew <= 1.) {
+ // Close enough
+ if(prevstep <= acu || Math.abs(ynew) <= acu) {
+ return x;
+ }
+ if(xnew != 0. && xnew != 1.) {
+ break;
+ }
+ }
+ }
+ g /= 3.;
+ }
+ // Convergence
+ if(Math.abs(xnew - x) < 1e-15 * x) {
+ return x;
+ }
+ // Iterate with new values
+ x = xnew;
+ y = ynew;
+ }
+ }
+ // Not converged in Newton-Raphson
+ throw new AbortException("Beta quantile computation did not converge.");
+ }
+} \ No newline at end of file