package de.lmu.ifi.dbs.elki.math.statistics.distribution;
/*
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
Copyright (C) 2012
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see .
*/
import java.util.Random;
import de.lmu.ifi.dbs.elki.logging.LoggingUtil;
import de.lmu.ifi.dbs.elki.math.MathUtil;
import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
/**
* Gamma Distribution, with random generation and density functions.
*
* @author Erich Schubert
*/
public class GammaDistribution implements DistributionWithRandom {
/**
* Euler–Mascheroni constant
*/
public static final double EULERS_CONST = 0.5772156649015328606065120900824024;
/**
* LANCZOS-Coefficients for Gamma approximation.
*
* These are said to have higher precision than those in "Numerical Recipes".
* They probably come from
*
* Paul Godfrey: http://my.fit.edu/~gabdo/gamma.txt
*/
static final double[] LANCZOS = { 0.99999999999999709182, 57.156235665862923517, -59.597960355475491248, 14.136097974741747174, -0.49191381609762019978, .33994649984811888699e-4, .46523628927048575665e-4, -.98374475304879564677e-4, .15808870322491248884e-3, -.21026444172410488319e-3, .21743961811521264320e-3, -.16431810653676389022e-3, .84418223983852743293e-4, -.26190838401581408670e-4, .36899182659531622704e-5, };
/**
* Numerical precision to use
*/
static final double NUM_PRECISION = 1E-15;
/**
* Alpha == k
*/
private final double k;
/**
* Theta == 1 / Beta
*/
private final double theta;
/**
* The random generator.
*/
private Random random;
/**
* Constructor for Gamma distribution.
*
* @param k k, alpha aka. "shape" parameter
* @param theta Theta = 1.0/Beta aka. "scaling" parameter
* @param random Random generator
*/
public GammaDistribution(double k, double theta, Random random) {
super();
if(k <= 0.0 || theta <= 0.0) {
throw new IllegalArgumentException("Invalid parameters for Gamma distribution.");
}
this.k = k;
this.theta = theta;
this.random = random;
}
/**
* Constructor for Gamma distribution.
*
* @param k k, alpha aka. "shape" parameter
* @param theta Theta = 1.0/Beta aka. "scaling" parameter
*/
public GammaDistribution(double k, double theta) {
this(k, theta, new Random());
}
@Override
public double pdf(double val) {
return pdf(val, k, theta);
}
@Override
public double cdf(double val) {
return cdf(val, k, theta);
}
@Override
public double quantile(double val) {
return quantile(val, k, theta);
}
@Override
public double nextRandom() {
return nextRandom(k, theta, random);
}
/**
* Simple toString explaining the distribution parameters.
*
* Used in producing a model description.
*/
@Override
public String toString() {
return "GammaDistribution(k=" + k + ", theta=" + theta + ")";
}
/**
* @return the value of k
*/
public double getK() {
return k;
}
/**
* @return the standard deviation
*/
public double getTheta() {
return theta;
}
/**
* The CDF, static version.
*
* @param val Value
* @param k Shape k
* @param theta Theta = 1.0/Beta aka. "scaling" parameter
* @return cdf value
*/
public static double cdf(double val, double k, double theta) {
return regularizedGammaP(k, val * theta);
}
/**
* The log CDF, static version.
*
* @param val Value
* @param k Shape k
* @param theta Theta = 1.0/Beta aka. "scaling" parameter
* @return cdf value
*/
public static double logcdf(double val, double k, double theta) {
return logregularizedGammaP(k, val * theta);
}
/**
* Gamma distribution PDF (with 0.0 for x < 0)
*
* @param x query value
* @param k Alpha
* @param theta Theta = 1 / Beta
* @return probability density
*/
public static double pdf(double x, double k, double theta) {
if(x < 0) {
return 0.0;
}
if(x == 0) {
if(k == 1.0) {
return theta;
}
else {
return 0.0;
}
}
if(k == 1.0) {
return Math.exp(-x * theta) * theta;
}
return Math.exp((k - 1.0) * Math.log(x * theta) - x * theta - logGamma(k)) * theta;
}
/**
* Gamma distribution PDF (with 0.0 for x < 0)
*
* @param x query value
* @param k Alpha
* @param theta Theta = 1 / Beta
* @return probability density
*/
public static double logpdf(double x, double k, double theta) {
if(x < 0) {
return Double.NEGATIVE_INFINITY;
}
if(x == 0) {
if(k == 1.0) {
return Math.log(theta);
}
else {
return Double.NEGATIVE_INFINITY;
}
}
if(k == 1.0) {
return Math.log(theta) - x * theta;
}
return Math.log(theta) + (k - 1.0) * Math.log(x * theta) - x * theta - logGamma(k);
}
/**
* Compute logGamma.
*
* Based loosely on "Numerical Recpies" and the work of Paul Godfrey at
* http://my.fit.edu/~gabdo/gamma.txt
*
* TODO: find out which approximation really is the best...
*
* @param x Parameter x
* @return log(Γ(x))
*/
public static double logGamma(final double x) {
if(Double.isNaN(x) || (x <= 0.0)) {
return Double.NaN;
}
double g = 607.0 / 128.0;
double tmp = x + g + .5;
tmp = (x + 0.5) * Math.log(tmp) - tmp;
double ser = LANCZOS[0];
for(int i = LANCZOS.length - 1; i > 0; --i) {
ser += LANCZOS[i] / (x + i);
}
return tmp + Math.log(MathUtil.SQRTTWOPI * ser / x);
}
/**
* Compute the regular Gamma function.
*
* Note: for numerical reasons, it is preferrable to use {@link #logGamma}
* when possible! In particular, this method just computes
* {@code Math.exp(logGamma(x))} anyway.
*
* Try to postpone the {@code Math.exp} call to preserve numeric range!
*
* @param x Position
* @return Gamma at this position
*/
public static double gamma(double x) {
return Math.exp(logGamma(x));
}
/**
* Returns the regularized gamma function P(a, x).
*
* Includes the quadrature way of computing.
*
* TODO: find "the" most accurate version of this. We seem to agree with
* others for the first 10+ digits, but diverge a bit later than that.
*
* @param a Parameter a
* @param x Parameter x
* @return Gamma value
*/
public static double regularizedGammaP(final double a, final double x) {
// Special cases
if(Double.isNaN(a) || Double.isNaN(x) || (a <= 0.0) || (x < 0.0)) {
return Double.NaN;
}
if(x == 0.0) {
return 0.0;
}
if(x >= a + 1) {
// Expected to converge faster
return 1.0 - regularizedGammaQ(a, x);
}
// Loosely following "Numerical Recipes"
double del = 1.0 / a;
double sum = del;
for(int n = 1; n < Integer.MAX_VALUE; n++) {
// compute next element in the series
del *= x / (a + n);
sum = sum + del;
if(Math.abs(del / sum) < NUM_PRECISION || sum >= Double.POSITIVE_INFINITY) {
break;
}
}
if(Double.isInfinite(sum)) {
return 1.0;
}
return Math.exp(-x + (a * Math.log(x)) - logGamma(a)) * sum;
}
/**
* Returns the regularized gamma function log P(a, x).
*
* Includes the quadrature way of computing.
*
* TODO: find "the" most accurate version of this. We seem to agree with
* others for the first 10+ digits, but diverge a bit later than that.
*
* @param a Parameter a
* @param x Parameter x
* @return Gamma value
*/
public static double logregularizedGammaP(final double a, final double x) {
// Special cases
if(Double.isNaN(a) || Double.isNaN(x) || (a <= 0.0) || (x < 0.0)) {
return Double.NaN;
}
if(x == 0.0) {
return Double.NEGATIVE_INFINITY;
}
if(x >= a + 1) {
// Expected to converge faster
// FIXME: and in log?
return Math.log(1.0 - regularizedGammaQ(a, x));
}
// Loosely following "Numerical Recipes"
double del = 1.0 / a;
double sum = del;
for(int n = 1; n < Integer.MAX_VALUE; n++) {
// compute next element in the series
del *= x / (a + n);
sum = sum + del;
if(Math.abs(del / sum) < NUM_PRECISION || sum >= Double.POSITIVE_INFINITY) {
break;
}
}
if(Double.isInfinite(sum)) {
return 0;
}
// TODO: reread numerical recipes, can we replace log(sum)?
return -x + (a * Math.log(x)) - logGamma(a) + Math.log(sum);
}
/**
* Returns the regularized gamma function Q(a, x) = 1 - P(a, x).
*
* Includes the continued fraction way of computing, based loosely on the book
* "Numerical Recipes"; but probably not with the exactly same precision,
* since we reimplemented this in our coding style, not literally.
*
* TODO: find "the" most accurate version of this. We seem to agree with
* others for the first 10+ digits, but diverge a bit later than that.
*
* @param a parameter a
* @param x parameter x
* @return Result
*/
public static double regularizedGammaQ(final double a, final double x) {
if(Double.isNaN(a) || Double.isNaN(x) || (a <= 0.0) || (x < 0.0)) {
return Double.NaN;
}
if(x == 0.0) {
return 1.0;
}
if(x < a + 1.0) {
// Expected to converge faster
return 1.0 - regularizedGammaP(a, x);
}
// Compute using continued fraction approach.
final double FPMIN = Double.MIN_VALUE / NUM_PRECISION;
double b = x + 1 - a;
double c = 1.0 / FPMIN;
double d = 1.0 / b;
double fac = d;
for(int i = 1; i < Integer.MAX_VALUE; i++) {
double an = i * (a - i);
b += 2;
d = an * d + b;
if(Math.abs(d) < FPMIN) {
d = FPMIN;
}
c = b + an / c;
if(Math.abs(c) < FPMIN) {
c = FPMIN;
}
d = 1 / d;
double del = d * c;
fac *= del;
if(Math.abs(del - 1.0) <= NUM_PRECISION) {
break;
}
}
return fac * Math.exp(-x + a * Math.log(x) - logGamma(a));
}
/**
* Generate a random value with the generators parameters.
*
* Along the lines of
*
* - J.H. Ahrens, U. Dieter (1974): Computer methods for sampling from gamma,
* beta, Poisson and binomial distributions, Computing 12, 223-246.
*
* - J.H. Ahrens, U. Dieter (1982): Generating gamma variates by a modified
* rejection technique, Communications of the ACM 25, 47-54.
*
* @param k K parameter
* @param theta Theta parameter
* @param random Random generator
*/
public static double nextRandom(double k, double theta, Random random) {
/* Constants */
final double q1 = 0.0416666664, q2 = 0.0208333723, q3 = 0.0079849875;
final double q4 = 0.0015746717, q5 = -0.0003349403, q6 = 0.0003340332;
final double q7 = 0.0006053049, q8 = -0.0004701849, q9 = 0.0001710320;
final double a1 = 0.333333333, a2 = -0.249999949, a3 = 0.199999867;
final double a4 = -0.166677482, a5 = 0.142873973, a6 = -0.124385581;
final double a7 = 0.110368310, a8 = -0.112750886, a9 = 0.104089866;
final double e1 = 1.000000000, e2 = 0.499999994, e3 = 0.166666848;
final double e4 = 0.041664508, e5 = 0.008345522, e6 = 0.001353826;
final double e7 = 0.000247453;
if(k < 1.0) { // Base case, for small k
final double b = 1.0 + 0.36788794412 * k; // Step 1
while(true) {
final double p = b * random.nextDouble();
if(p <= 1.0) { // when gds <= 1
final double gds = Math.exp(Math.log(p) / k);
if(Math.log(random.nextDouble()) <= -gds) {
return (gds / theta);
}
}
else { // when gds > 1
final double gds = -Math.log((b - p) / k);
if(Math.log(random.nextDouble()) <= ((k - 1.0) * Math.log(gds))) {
return (gds / theta);
}
}
}
}
else {
// Step 1. Preparations
final double ss, s, d;
if(k != -1.0) {
ss = k - 0.5;
s = Math.sqrt(ss);
d = 5.656854249 - 12.0 * s;
}
else {
// For k == -1.0:
ss = 0.0;
s = 0.0;
d = 0.0;
}
// Random vector of maximum length 1
final double v1, /* v2, */v12;
{ // Temporary values - candidate
double tv1, tv2, tv12;
do {
tv1 = 2.0 * random.nextDouble() - 1.0;
tv2 = 2.0 * random.nextDouble() - 1.0;
tv12 = tv1 * tv1 + tv2 * tv2;
}
while(tv12 > 1.0);
v1 = tv1;
/* v2 = tv2; */
v12 = tv12;
}
// double b = 0.0, c = 0.0;
// double si = 0.0, q0 = 0.0;
final double b, c, si, q0;
// Simpler accept cases & parameter computation
{
final double t = v1 * Math.sqrt(-2.0 * Math.log(v12) / v12);
final double x = s + 0.5 * t;
final double gds = x * x;
if(t >= 0.0) {
return (gds / theta); // Immediate acceptance
}
// Random uniform
final double un = random.nextDouble();
// Squeeze acceptance
if(d * un <= t * t * t) {
return (gds / theta);
}
if(k != -1.0) { // Step 4. Set-up for hat case
final double r = 1.0 / k;
q0 = ((((((((q9 * r + q8) * r + q7) * r + q6) * r + q5) * r + q4) * r + q3) * r + q2) * r + q1) * r;
if(k > 3.686) {
if(k > 13.022) {
b = 1.77;
si = 0.75;
c = 0.1515 / s;
}
else {
b = 1.654 + 0.0076 * ss;
si = 1.68 / s + 0.275;
c = 0.062 / s + 0.024;
}
}
else {
b = 0.463 + s - 0.178 * ss;
si = 1.235;
c = 0.195 / s - 0.079 + 0.016 * s;
}
}
else {
// For k == -1.0:
b = 0.0;
c = 0.0;
si = 0.0;
q0 = 0.0;
}
// Compute v and q
if(x > 0.0) {
final double v = t / (s + s);
final double q;
if(Math.abs(v) > 0.25) {
q = q0 - s * t + 0.25 * t * t + (ss + ss) * Math.log(1.0 + v);
}
else {
q = q0 + 0.5 * t * t * ((((((((a9 * v + a8) * v + a7) * v + a6) * v + a5) * v + a4) * v + a3) * v + a2) * v + a1) * v;
}
// Quotient acceptance:
if(Math.log(1.0 - un) <= q) {
return (gds / theta);
}
}
}
// Double exponential deviate t
while(true) {
double e, u, sign_u, t;
// Retry until t is sufficiently large
do {
e = -Math.log(random.nextDouble());
u = random.nextDouble();
u = u + u - 1.0;
sign_u = (u > 0) ? 1.0 : -1.0;
t = b + (e * si) * sign_u;
}
while(t <= -0.71874483771719);
// New v(t) and q(t)
final double v = t / (s + s);
final double q;
if(Math.abs(v) > 0.25) {
q = q0 - s * t + 0.25 * t * t + (ss + ss) * Math.log(1.0 + v);
}
else {
q = q0 + 0.5 * t * t * ((((((((a9 * v + a8) * v + a7) * v + a6) * v + a5) * v + a4) * v + a3) * v + a2) * v + a1) * v;
}
if(q <= 0.0) {
continue; // retry
}
// Compute w(t)
final double w;
if(q > 0.5) {
w = Math.exp(q) - 1.0;
}
else {
w = ((((((e7 * q + e6) * q + e5) * q + e4) * q + e3) * q + e2) * q + e1) * q;
}
// Hat acceptance
if(c * u * sign_u <= w * Math.exp(e - 0.5 * t * t)) {
final double x = s + 0.5 * t;
return (x * x / theta);
}
}
}
}
/**
* Approximate probit for chi squared distribution
*
* Based on first half of algorithm AS 91
*
* Reference:
*
* Algorithm AS 91: The percentage points of the $\chi$ 2 distribution
* D.J. Best, D. E. Roberts
* Journal of the Royal Statistical Society. Series C (Applied Statistics)
*
*
* @param p Probit value
* @param nu Shape parameter for Chi, nu = 2 * k
* @param g log(nu)
* @return Probit for chi squared
*/
@Reference(title = "Algorithm AS 91: The percentage points of the $\\chi^2$ distribution", authors = "D.J. Best, D. E. Roberts", booktitle = "Journal of the Royal Statistical Society. Series C (Applied Statistics)")
protected static double chisquaredProbitApproximation(final double p, double nu, double g) {
final double EPS1 = 1e-2; // Approximation quality
// Sanity checks
if(Double.isNaN(p) || Double.isNaN(nu)) {
return Double.NaN;
}
// Range check
if(p <= 0) {
return 0;
}
if(p >= 1) {
return Double.POSITIVE_INFINITY;
}
// Invalid parameters
if(nu <= 0) {
return Double.NaN;
}
// Shape of gamma distribution, "XX" in AS 91
final double k = 0.5 * nu;
// For small chi squared values - AS 91
final double logp = Math.log(p);
if(nu < -1.24 * logp) {
// FIXME: implement and use logGammap1 instead - more stable?
//
// final double lgam1pa = (alpha < 0.5) ? logGammap1(alpha) :
// (Math.log(alpha) + g);
// return Math.exp((lgam1pa + logp) / alpha + MathUtil.LOG2);
// This is literal AS 91, above is the GNU R variant.
return Math.pow(p * k * Math.exp(g + k * MathUtil.LOG2), 1 / k);
}
else if(nu > 0.32) {
// Wilson and Hilferty estimate: - AS 91 at 3
final double x = NormalDistribution.quantile(p, 0, 1);
final double p1 = 2. / (9. * nu);
double ch = nu * Math.pow(x * Math.sqrt(p1) + 1 - p1, 3);
// Better approximation for p tending to 1:
if(ch > 2.2 * nu + 6) {
ch = -2 * (Math.log1p(-p) - (k - 1) * Math.log(0.5 * ch) + g);
}
return ch;
}
else {
// nu <= 0.32, AS 91 at 1
final double C7 = 4.67, C8 = 6.66, C9 = 6.73, C10 = 13.32;
final double ag = Math.log1p(-p) + g + (k - 1) * MathUtil.LOG2;
double ch = 0.4;
while(true) {
final double p1 = 1 + ch * (C7 + ch);
final double p2 = ch * (C9 + ch * (C8 + ch));
final double t = -0.5 + (C7 + 2 * ch) / p1 - (C9 + ch * (C10 + 3 * ch)) / p2;
final double delta = (1 - Math.exp(ag + 0.5 * ch) * p2 / p1) / t;
ch -= delta;
if(Math.abs(delta) > EPS1 * Math.abs(ch)) {
return ch;
}
}
}
}
/**
* Compute probit (inverse cdf) for Gamma distributions.
*
* Based on algorithm AS 91:
*
* Reference:
*
* Algorithm AS 91: The percentage points of the $\chi$^2 distribution
* D.J. Best, D. E. Roberts
* Journal of the Royal Statistical Society. Series C (Applied Statistics)
*
*
* @param p Probability
* @param k k, alpha aka. "shape" parameter
* @param theta Theta = 1.0/Beta aka. "scaling" parameter
* @return Probit for Gamma distribution
*/
@Reference(title = "Algorithm AS 91: The percentage points of the $\\chi$^2 distribution", authors = "D.J. Best, D. E. Roberts", booktitle = "Journal of the Royal Statistical Society. Series C (Applied Statistics)")
public static double quantile(double p, double k, double theta) {
final double EPS2 = 5e-7; // final precision of AS 91
final int MAXIT = 1000;
// Avoid degenerates
if(Double.isNaN(p) || Double.isNaN(k) || Double.isNaN(theta)) {
return Double.NaN;
}
// Range check
if(p <= 0) {
return 0;
}
if(p >= 1) {
return Double.POSITIVE_INFINITY;
}
// Shape parameter check
if(k < 0 || theta <= 0) {
return Double.NaN;
}
// Corner case - all at 0
if(k == 0) {
return 0.;
}
int max_newton_iterations = 1;
// For small values, ensure some refinement iterations
if(k < 1e-10) {
max_newton_iterations = 7;
}
final double g = logGamma(k); // == logGamma(v/2)
// Phase I, an initial rough approximation
// First half of AS 91
double ch = chisquaredProbitApproximation(p, 2 * k, g);
// Second hald of AS 91 follows:
// Refine ChiSquared approximation
chisq: {
if(Double.isInfinite(ch)) {
// Cannot refine infinity
max_newton_iterations = 0;
break chisq;
}
if(ch < EPS2) {
// Do not iterate, but refine with newton method
max_newton_iterations = 20;
break chisq;
}
if(p > 1 - 1e-14 || p < 1e-100) {
// Not in appropriate value range for AS 91
max_newton_iterations = 20;
break chisq;
}
// Phase II: Iteration
final double c = k - 1;
final double ch0 = ch; // backup initial approximation
for(int i = 1; i <= MAXIT; i++) {
final double q = ch; // previous approximation
final double p1 = 0.5 * ch;
final double p2 = p - regularizedGammaP(k, p1);
if(Double.isInfinite(p2) || ch <= 0) {
ch = ch0;
max_newton_iterations = 27;
break chisq;
}
{ // Taylor series of AS 91: iteration via "goto 4"
final double t = p2 * Math.exp(k * MathUtil.LOG2 + g + p1 - c * Math.log(ch));
final double b = t / ch;
final double a = 0.5 * t - b * c;
final double s1 = (210. + a * (140. + a * (105. + a * (84. + a * (70. + 60. * a))))) / 420.;
final double s2 = (420. + a * (735. + a * (966. + a * (1141. + 1278 * a)))) / 2520.;
final double s3 = (210. + a * (462. + a * (707. + 932. * a))) / 2520.;
final double s4 = (252. + a * (672. + 1182. * a) + c * (294. + a * (889. + 1740. * a))) / 5040.;
final double s5 = (84. + 2264. * a + c * (1175. + 606. * a)) / 2520.;
final double s6 = (120. + c * (346. + 127. * c)) / 5040.;
ch += t * (1 + 0.5 * t * s1 - b * c * (s1 - b * (s2 - b * (s3 - b * (s4 - b * (s5 - b * s6))))));
}
if(Math.abs(q - ch) < EPS2 * ch) {
break chisq;
}
// Divergence treatment, from GNU R
if(Math.abs(q - ch) > 0.1 * Math.abs(ch)) {
ch = ((ch < q) ? 0.9 : 1.1) * q;
}
}
LoggingUtil.warning("No convergence in AS 91 Gamma probit.");
// no convergence in MAXIT iterations -- but we add Newton now...
}
double x = 0.5 * ch / theta;
if(max_newton_iterations > 0) {
// Refine result using final Newton steps.
// TODO: add unit tests that show an improvement! Maybe in logscale only?
x = gammaQuantileNewtonRefinement(Math.log(p), k, theta, max_newton_iterations, x);
}
return x;
}
/**
* Refinement of ChiSquared probit using Newton iterations.
*
* A trick used by GNU R to improve precision.
*
* @param logpt Target value of log p
* @param k Alpha
* @param theta Theta = 1 / Beta
* @param maxit Maximum number of iterations to do
* @param x Initial estimate
* @return Refined value
*/
protected static double gammaQuantileNewtonRefinement(final double logpt, final double k, final double theta, final int maxit, double x) {
final double EPS_N = 1e-15; // Precision threshold
// 0 is not possible, try MIN_NORMAL instead
if(x <= 0) {
x = Double.MIN_NORMAL;
}
// Current estimation
double logpc = logcdf(x, k, theta);
if(x == Double.MIN_NORMAL && logpc > logpt * (1. + 1e-7)) {
return 0.;
}
if(logpc == Double.NEGATIVE_INFINITY) {
return 0.;
}
// Refine by newton iterations
for(int i = 0; i < maxit; i++) {
// Error of current approximation
final double logpe = logpc - logpt;
if(Math.abs(logpe) < Math.abs(EPS_N * logpt)) {
break;
}
// Step size is controlled by PDF:
final double g = logpdf(x, k, theta);
if(g == Double.NEGATIVE_INFINITY) {
break;
}
final double newx = x - logpe * Math.exp(logpc - g);
// New estimate:
logpc = logcdf(newx, k, theta);
if(Math.abs(logpc - logpt) > Math.abs(logpe) || (i > 0 && Math.abs(logpc - logpt) == Math.abs(logpe))) {
// no further improvement
break;
}
x = newx;
}
return x;
}
/**
* Compute the Psi / Digamma function
*
* Reference:
*
* J. M. Bernando
* Algorithm AS 103: Psi (Digamma) Function
* Statistical Algorithms
*
*
* TODO: is there a more accurate version maybe in R?
*
* @param x Position
* @return digamma value
*/
@Reference(authors = "J. M. Bernando", title = "Algorithm AS 103: Psi (Digamma) Function", booktitle = "Statistical Algorithms")
public static double digamma(double x) {
if(!(x > 0)) {
return Double.NaN;
}
// Method of equation 5:
if(x <= 1e-5) {
return -EULERS_CONST - 1. / x;
}
// Method of equation 4:
else if(x > 49.) {
final double ix2 = 1. / (x * x);
// Partial series expansion
return Math.log(x) - 0.5 / x - ix2 * ((1.0 / 12.) + ix2 * (1.0 / 120. - ix2 / 252.));
// + O(x^8) error
}
else {
// Stirling expansion
return digamma(x + 1.) - 1. / x;
}
}
/**
* Compute the Trigamma function. Based on digamma.
*
* TODO: is there a more accurate version maybe in R?
*
* @param x Position
* @return trigamma value
*/
public static double trigamma(double x) {
if(!(x > 0)) {
return Double.NaN;
}
// Method of equation 5:
if(x <= 1e-5) {
return 1. / (x * x);
}
// Method of equation 4:
else if(x > 49.) {
final double ix2 = 1. / (x * x);
// Partial series expansion
return 1 / x - ix2 / 2. + ix2 / x * (1.0 / 6. - ix2 * (1.0 / 30. + ix2 / 42.));
// + O(x^8) error
}
else {
// Stirling expansion
return trigamma(x + 1.) - 1. / (x * x);
}
}
/**
* Mean least squares estimation of Gamma distribution to a set of
* observations.
*
* @param data Data
* @return Assumed distribution
*/
public static GammaDistribution estimate(double[] data) {
return estimate(data, data.length);
}
/**
* Mean least squares estimation of Gamma distribution to a set of
* observations.
*
* Reference:
*
* Maximum likelihood estimation of the parameters of the gamma distribution
* and their bias
* S. C. Choi, R. Wette
* in: Technometrics
*
*
* @param data Data
* @param len Length of array
* @return Assumed distribution
*/
@Reference(title = "Maximum likelihood estimation of the parameters of the gamma distribution and their bias", authors = "S. C. Choi, R. Wette", booktitle = "Technometrics", url = "http://www.jstor.org/stable/10.2307/1266892")
public static GammaDistribution estimate(double[] data, int len) {
double meanx = 0, meanlogx = 0;
for(int i = 0; i < len; i++) {
final double logx = Math.log(data[i]);
final double deltax = data[i] - meanx;
final double deltalogx = logx - meanlogx;
meanx += deltax / (i + 1.);
meanlogx += deltalogx / (i + 1.);
}
// Initial approximation
final double logmeanx = Math.log(meanx);
final double diff = logmeanx - meanlogx;
double k = (3 - diff + Math.sqrt((diff - 3) * (diff - 3) + 24 * diff)) / (12 * diff);
// Refine via newton iteration, based on Choi and Wette equation
while(true) {
double kdelta = (Math.log(k) - digamma(k) - diff) / (1 / k - trigamma(k));
if(Math.abs(kdelta) < 1E-8 || Double.isNaN(kdelta)) {
break;
}
k += kdelta;
}
// Estimate theta:
final double theta = k / meanx;
return new GammaDistribution(k, theta);
}
}