summaryrefslogtreecommitdiff
path: root/src/de/lmu/ifi/dbs/elki/math/statistics
diff options
context:
space:
mode:
Diffstat (limited to 'src/de/lmu/ifi/dbs/elki/math/statistics')
-rw-r--r--src/de/lmu/ifi/dbs/elki/math/statistics/EpanechnikovKernelDensityFunction.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/math/statistics/GaussianKernelDensityFunction.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/math/statistics/KernelDensityEstimator.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/math/statistics/KernelDensityFunction.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/math/statistics/LinearRegression.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/math/statistics/MultipleLinearRegression.java16
-rw-r--r--src/de/lmu/ifi/dbs/elki/math/statistics/PolynomialRegression.java6
-rw-r--r--src/de/lmu/ifi/dbs/elki/math/statistics/QuickSelect.java231
-rw-r--r--src/de/lmu/ifi/dbs/elki/math/statistics/StudentDistribution.java14
-rw-r--r--src/de/lmu/ifi/dbs/elki/math/statistics/TriangularKernelDensityFunction.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/math/statistics/UniformKernelDensityFunction.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/math/statistics/distribution/ChiDistribution.java92
-rw-r--r--src/de/lmu/ifi/dbs/elki/math/statistics/distribution/ChiSquaredDistribution.java72
-rw-r--r--src/de/lmu/ifi/dbs/elki/math/statistics/distribution/ConstantDistribution.java61
-rw-r--r--src/de/lmu/ifi/dbs/elki/math/statistics/distribution/Distribution.java63
-rw-r--r--src/de/lmu/ifi/dbs/elki/math/statistics/distribution/GammaDistribution.java470
-rw-r--r--src/de/lmu/ifi/dbs/elki/math/statistics/distribution/NormalDistribution.java337
-rw-r--r--src/de/lmu/ifi/dbs/elki/math/statistics/distribution/UniformDistribution.java132
-rw-r--r--src/de/lmu/ifi/dbs/elki/math/statistics/distribution/package-info.java26
-rw-r--r--src/de/lmu/ifi/dbs/elki/math/statistics/package-info.java2
20 files changed, 1278 insertions, 258 deletions
diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/EpanechnikovKernelDensityFunction.java b/src/de/lmu/ifi/dbs/elki/math/statistics/EpanechnikovKernelDensityFunction.java
index 0b48a69e..ed9d8d58 100644
--- a/src/de/lmu/ifi/dbs/elki/math/statistics/EpanechnikovKernelDensityFunction.java
+++ b/src/de/lmu/ifi/dbs/elki/math/statistics/EpanechnikovKernelDensityFunction.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.math.statistics;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2011
+ Copyright (C) 2012
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/GaussianKernelDensityFunction.java b/src/de/lmu/ifi/dbs/elki/math/statistics/GaussianKernelDensityFunction.java
index 7adf8ee1..744a9108 100644
--- a/src/de/lmu/ifi/dbs/elki/math/statistics/GaussianKernelDensityFunction.java
+++ b/src/de/lmu/ifi/dbs/elki/math/statistics/GaussianKernelDensityFunction.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.math.statistics;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2011
+ Copyright (C) 2012
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/KernelDensityEstimator.java b/src/de/lmu/ifi/dbs/elki/math/statistics/KernelDensityEstimator.java
index 398e6927..3bb0e1f6 100644
--- a/src/de/lmu/ifi/dbs/elki/math/statistics/KernelDensityEstimator.java
+++ b/src/de/lmu/ifi/dbs/elki/math/statistics/KernelDensityEstimator.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.math.statistics;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2011
+ Copyright (C) 2012
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/KernelDensityFunction.java b/src/de/lmu/ifi/dbs/elki/math/statistics/KernelDensityFunction.java
index 4c9100fd..29718fcb 100644
--- a/src/de/lmu/ifi/dbs/elki/math/statistics/KernelDensityFunction.java
+++ b/src/de/lmu/ifi/dbs/elki/math/statistics/KernelDensityFunction.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.math.statistics;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2011
+ Copyright (C) 2012
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/LinearRegression.java b/src/de/lmu/ifi/dbs/elki/math/statistics/LinearRegression.java
index f28986f0..231b2071 100644
--- a/src/de/lmu/ifi/dbs/elki/math/statistics/LinearRegression.java
+++ b/src/de/lmu/ifi/dbs/elki/math/statistics/LinearRegression.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.math.statistics;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2011
+ Copyright (C) 2012
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/MultipleLinearRegression.java b/src/de/lmu/ifi/dbs/elki/math/statistics/MultipleLinearRegression.java
index 90f80135..0e674146 100644
--- a/src/de/lmu/ifi/dbs/elki/math/statistics/MultipleLinearRegression.java
+++ b/src/de/lmu/ifi/dbs/elki/math/statistics/MultipleLinearRegression.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.math.statistics;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2011
+ Copyright (C) 2012
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -139,14 +139,10 @@ public class MultipleLinearRegression {
@Override
public String toString() {
StringBuffer msg = new StringBuffer();
- msg.append("\nx = ");
- msg.append(FormatUtil.format(x, 9, 4));
- msg.append("\ny = ");
- msg.append(FormatUtil.format(y, 9, 4));
- msg.append("\nb = ");
- msg.append(FormatUtil.format(b, 9, 4));
- msg.append("\ne = ");
- msg.append(FormatUtil.format(e, 9, 4));
+ msg.append("x = ").append(FormatUtil.format(x, 9, 4));
+ msg.append("\ny = ").append(FormatUtil.format(y, 9, 4));
+ msg.append("\nb = ").append(FormatUtil.format(b, 9, 4));
+ msg.append("\ne = ").append(FormatUtil.format(e, 9, 4));
msg.append("error variance = ").append(FormatUtil.format(variance, 4));
return msg.toString();
}
@@ -203,7 +199,7 @@ public class MultipleLinearRegression {
* @return the estimation of y
*/
public double estimateY(Matrix x) {
- return x.times(b).get(0, 0);
+ return x.times(b).get(0);
}
/**
diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/PolynomialRegression.java b/src/de/lmu/ifi/dbs/elki/math/statistics/PolynomialRegression.java
index 1b785f75..b1bfe31b 100644
--- a/src/de/lmu/ifi/dbs/elki/math/statistics/PolynomialRegression.java
+++ b/src/de/lmu/ifi/dbs/elki/math/statistics/PolynomialRegression.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.math.statistics;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2011
+ Copyright (C) 2012
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -62,7 +62,7 @@ public class PolynomialRegression extends MultipleLinearRegression {
}
private static Matrix xMatrix(Vector x, int p) {
- int n = x.getRowDimensionality();
+ int n = x.getDimensionality();
Matrix result = new Matrix(n, p + 1);
for(int i = 0; i < n; i++) {
@@ -79,7 +79,7 @@ public class PolynomialRegression extends MultipleLinearRegression {
* @return the adapted coefficient of determination
*/
public double adaptedCoefficientOfDetermination() {
- int n = getEstimatedResiduals().getRowDimensionality();
+ int n = getEstimatedResiduals().getDimensionality();
return 1.0 - ((n - 1.0) / (n * 1.0 - p)) * (1 - coefficientOfDetermination());
}
diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/QuickSelect.java b/src/de/lmu/ifi/dbs/elki/math/statistics/QuickSelect.java
deleted file mode 100644
index efacd395..00000000
--- a/src/de/lmu/ifi/dbs/elki/math/statistics/QuickSelect.java
+++ /dev/null
@@ -1,231 +0,0 @@
-package de.lmu.ifi.dbs.elki.math.statistics;
-
-/*
- This file is part of ELKI:
- Environment for Developing KDD-Applications Supported by Index-Structures
-
- Copyright (C) 2011
- Ludwig-Maximilians-Universität München
- Lehr- und Forschungseinheit für Datenbanksysteme
- ELKI Development Team
-
- This program is free software: you can redistribute it and/or modify
- it under the terms of the GNU Affero General Public License as published by
- the Free Software Foundation, either version 3 of the License, or
- (at your option) any later version.
-
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU Affero General Public License for more details.
-
- You should have received a copy of the GNU Affero General Public License
- along with this program. If not, see <http://www.gnu.org/licenses/>.
- */
-
-
-/**
- * QuickSelect computes ("selects") the element at a given rank and can be used
- * to compute Medians and arbitrary quantiles by computing the appropriate rank.
- *
- * This algorithm is essentially an incomplete QuickSort that only descends into
- * that part of the data that we are interested in, and also attributed to
- * Charles Antony Richard Hoare
- *
- * @author Erich Schubert
- */
-public class QuickSelect {
- /**
- * For small arrays, use a simpler method:
- */
- private static final int SMALL = 10;
-
- /**
- * QuickSelect is essentially quicksort, except that we only "sort" that half
- * of the array that we are interested in.
- *
- * Note: the array is <b>modified</b> by this.
- *
- * @param data Data to process
- * @param rank Rank position that we are interested in (integer!)
- * @return Value at the given rank
- */
- public static double quickSelect(double[] data, int rank) {
- quickSelect(data, 0, data.length - 1, rank);
- return data[rank];
- }
-
- /**
- * Compute the median of an array efficiently using the QuickSelect method.
- *
- * Note: the array is <b>modified</b> by this.
- *
- * @param data Data to process
- * @return Median value
- */
- public static double median(double[] data) {
- return median(data, 0, data.length - 1);
- }
-
- /**
- * Compute the median of an array efficiently using the QuickSelect method.
- *
- * Note: the array is <b>modified</b> by this.
- *
- * @param data Data to process
- * @param begin Begin of valid values
- * @param end End of valid values (inclusive!)
- * @return Median value
- */
- public static double median(double[] data, int begin, int end) {
- final int length = (end + 1) - begin;
- assert (length > 0);
- // Integer division is "floor" since we are non-negative.
- final int left = begin + (length - 1) / 2;
- quickSelect(data, begin, end, left);
- if(length % 2 == 1) {
- return data[left];
- }
- else {
- quickSelect(data, begin, end, left + 1);
- return data[left] + (data[left + 1] - data[left]) / 2;
- }
- }
-
- /**
- * Compute the median of an array efficiently using the QuickSelect method.
- *
- * Note: the array is <b>modified</b> by this.
- *
- * @param data Data to process
- * @param quant Quantile to compute
- * @return Value at quantile
- */
- public static double quantile(double[] data, double quant) {
- return quantile(data, 0, data.length - 1, quant);
- }
-
- /**
- * Compute the median of an array efficiently using the QuickSelect method.
- *
- * Note: the array is <b>modified</b> by this.
- *
- * @param data Data to process
- * @param begin Begin of valid values
- * @param end End of valid values (inclusive!)
- * @param quant Quantile to compute
- * @return Value at quantile
- */
- public static double quantile(double[] data, int begin, int end, double quant) {
- final int length = (end + 1) - begin;
- assert (length > 0) : "Quantile on empty set?";
- // Integer division is "floor" since we are non-negative.
- final double dleft = begin + (length - 1) * quant;
- final int ileft = (int) Math.floor(dleft);
- final double err = dleft - ileft;
-
- quickSelect(data, begin, end, ileft);
- if(err <= Double.MIN_NORMAL) {
- return data[ileft];
- }
- else {
- quickSelect(data, begin, end, ileft + 1);
- // Mix:
- double mix = data[ileft] + (data[ileft + 1] - data[ileft]) * err;
- return mix;
- }
- }
-
- /**
- * QuickSelect is essentially quicksort, except that we only "sort" that half
- * of the array that we are interested in.
- *
- * @param data Data to process
- * @param start Interval start
- * @param end Interval end (inclusive)
- * @param rank rank position we are interested in (starting at 0)
- */
- public static void quickSelect(double[] data, int start, int end, int rank) {
- // Optimization for small arrays
- // This also ensures a minimum size below
- if(start + SMALL > end) {
- insertionSort(data, start, end);
- return;
- }
-
- // Pick pivot from three candidates: start, middle, end
- // Since we compare them, we can also just "bubble sort" them.
- final int middle = (start + end) / 2;
- if(data[start] > data[middle]) {
- swap(data, start, middle);
- }
- if(data[start] > data[end]) {
- swap(data, start, end);
- }
- if(data[middle] > data[end]) {
- swap(data, middle, end);
- }
- // TODO: use more candidates for larger arrays?
-
- final double pivot = data[middle];
- // Move middle element out of the way, just before end
- // (Since we already know that "end" is bigger)
- swap(data, middle, end - 1);
-
- // Begin partitioning
- int i = start + 1, j = end - 2;
- // This is classic quicksort stuff
- while(true) {
- while(data[i] <= pivot && i <= j) {
- i++;
- }
- while(data[j] >= pivot && j >= i) {
- j--;
- }
- if(i >= j) {
- break;
- }
- swap(data, i, j);
- }
-
- // Move pivot (former middle element) back into the appropriate place
- swap(data, i, end - 1);
-
- // In contrast to quicksort, we only need to recurse into the half we are
- // interested in.
- if(rank < i) {
- quickSelect(data, start, i - 1, rank);
- }
- else if(rank > i) {
- quickSelect(data, i + 1, end, rank);
- }
- }
-
- /**
- * Sort a small array using repetitive insertion sort.
- *
- * @param data Data to sort
- * @param start Interval start
- * @param end Interval end
- */
- private static void insertionSort(double[] data, int start, int end) {
- for(int i = start + 1; i <= end; i++) {
- for(int j = i; j > start && data[j - 1] > data[j]; j--) {
- swap(data, j, j - 1);
- }
- }
- }
-
- /**
- * The usual swap method.
- *
- * @param data Array
- * @param a First index
- * @param b Second index
- */
- private static final void swap(double[] data, int a, int b) {
- double tmp = data[a];
- data[a] = data[b];
- data[b] = tmp;
- }
-} \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/StudentDistribution.java b/src/de/lmu/ifi/dbs/elki/math/statistics/StudentDistribution.java
index e9ad4a00..a8f45f9d 100644
--- a/src/de/lmu/ifi/dbs/elki/math/statistics/StudentDistribution.java
+++ b/src/de/lmu/ifi/dbs/elki/math/statistics/StudentDistribution.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.math.statistics;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2011
+ Copyright (C) 2012
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -23,8 +23,10 @@ package de.lmu.ifi.dbs.elki.math.statistics;
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
-import java.util.HashMap;
-import java.util.Map;
+import gnu.trove.map.TDoubleDoubleMap;
+import gnu.trove.map.TIntObjectMap;
+import gnu.trove.map.hash.TDoubleDoubleHashMap;
+import gnu.trove.map.hash.TIntObjectHashMap;
/**
* Tabelarizes the values for student distribution.
@@ -125,7 +127,7 @@ public class StudentDistribution {
/**
* Holds the t-values.
*/
- private static Map<Integer, Map<Double, Double>> tValues = new HashMap<Integer, Map<Double, Double>>();
+ private static TIntObjectMap<TDoubleDoubleMap> tValues = new TIntObjectHashMap<TDoubleDoubleMap>();
static {
put(31, new double[] { 0.2533, 0.8416, 1.2816, 1.6449, 1.96, 2.3263, 2.5758, 3.0903, 3.2906 });
@@ -142,7 +144,7 @@ public class StudentDistribution {
if(n > 30) {
n = 31;
}
- Map<Double, Double> map = tValues.get(n);
+ TDoubleDoubleMap map = tValues.get(n);
if(map == null) {
throw new IllegalArgumentException("t-values for n=" + n + " not yet tabularized!");
}
@@ -162,7 +164,7 @@ public class StudentDistribution {
* @param values the t-values
*/
private static void put(int n, double[] values) {
- Map<Double, Double> map = new HashMap<Double, Double>();
+ TDoubleDoubleMap map = new TDoubleDoubleHashMap();
map.put(_6000, values[0]);
map.put(_8000, values[1]);
map.put(_9000, values[2]);
diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/TriangularKernelDensityFunction.java b/src/de/lmu/ifi/dbs/elki/math/statistics/TriangularKernelDensityFunction.java
index 7beb17c3..e3c23b2a 100644
--- a/src/de/lmu/ifi/dbs/elki/math/statistics/TriangularKernelDensityFunction.java
+++ b/src/de/lmu/ifi/dbs/elki/math/statistics/TriangularKernelDensityFunction.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.math.statistics;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2011
+ Copyright (C) 2012
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/UniformKernelDensityFunction.java b/src/de/lmu/ifi/dbs/elki/math/statistics/UniformKernelDensityFunction.java
index 2f000736..8d85528f 100644
--- a/src/de/lmu/ifi/dbs/elki/math/statistics/UniformKernelDensityFunction.java
+++ b/src/de/lmu/ifi/dbs/elki/math/statistics/UniformKernelDensityFunction.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.math.statistics;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2011
+ Copyright (C) 2012
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/ChiDistribution.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/ChiDistribution.java
new file mode 100644
index 00000000..c561c4cd
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/ChiDistribution.java
@@ -0,0 +1,92 @@
+package de.lmu.ifi.dbs.elki.math.statistics.distribution;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2012
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/**
+ * Chi distribution.
+ *
+ * @author Erich Schubert
+ */
+public class ChiDistribution implements Distribution {
+ /**
+ * Degrees of freedom. Usually integer.
+ */
+ private double dof;
+
+ /**
+ * Chi squared distribution (for random generation)
+ */
+ private ChiSquaredDistribution chisq;
+
+ /**
+ * Constructor.
+ *
+ * @param dof Degrees of freedom. Usually integer.
+ */
+ public ChiDistribution(double dof) {
+ super();
+ this.dof = dof;
+ this.chisq = new ChiSquaredDistribution(dof);
+ }
+
+ @Override
+ public double nextRandom() {
+ return Math.sqrt(chisq.nextRandom());
+ }
+
+ @Override
+ public double pdf(double val) {
+ return pdf(val, dof);
+ }
+
+ /**
+ * PDF function
+ *
+ * @param val Value
+ * @param dof Degrees of freedom
+ * @return Pdf value
+ */
+ public static double pdf(double val, double dof) {
+ if(val < 0) {
+ return 0.0;
+ }
+ return Math.sqrt(ChiSquaredDistribution.pdf(val, dof));
+ }
+
+ @Override
+ public double cdf(double val) {
+ return cdf(val, dof);
+ }
+
+ /**
+ * Cumulative density function.
+ *
+ * @param val Value
+ * @param dof Degrees of freedom.
+ * @return CDF value
+ */
+ public static double cdf(double val, double dof) {
+ return GammaDistribution.regularizedGammaP(dof / 2, val * val / 2);
+ }
+} \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/ChiSquaredDistribution.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/ChiSquaredDistribution.java
new file mode 100644
index 00000000..0ab39c78
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/ChiSquaredDistribution.java
@@ -0,0 +1,72 @@
+package de.lmu.ifi.dbs.elki.math.statistics.distribution;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2012
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/**
+ * Chi-Squared distribution (a specialization of the Gamma distribution).
+ *
+ * @author Erich Schubert
+ */
+public class ChiSquaredDistribution extends GammaDistribution {
+ /**
+ * Constructor.
+ *
+ * @param dof Degrees of freedom.
+ */
+ public ChiSquaredDistribution(double dof) {
+ super(.5 * dof, 2.0);
+ }
+
+ /**
+ * The CDF, static version.
+ *
+ * @param val Value
+ * @param dof Degrees of freedom.
+ * @return cdf value
+ */
+ public static double cdf(double val, double dof) {
+ return regularizedGammaP(.5 * dof, .5 * val);
+ }
+
+ /**
+ * Chi-Squared distribution PDF (with 0.0 for x &lt; 0)
+ *
+ * @param x query value
+ * @param dof Degrees of freedom.
+ * @return probability density
+ */
+ public static double pdf(double x, double dof) {
+ if(x <= 0) {
+ return 0.0;
+ }
+ if(x == 0) {
+ return 0.0;
+ }
+ final double k = dof / 2;
+ if(k == 1.0) {
+ return Math.exp(-x * 2.0) * 2.0;
+ }
+ return Math.exp((k - 1.0) * Math.log(x * 2.0) - x * 2.0 - logGamma(k)) * 2.0;
+ }
+} \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/ConstantDistribution.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/ConstantDistribution.java
new file mode 100644
index 00000000..1f36dd4a
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/ConstantDistribution.java
@@ -0,0 +1,61 @@
+package de.lmu.ifi.dbs.elki.math.statistics.distribution;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2012
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/**
+ * Pseudo distribution, that has a unique constant value.
+ *
+ * @author Erich Schubert
+ */
+public class ConstantDistribution implements Distribution {
+ /**
+ * The constant
+ */
+ final double c;
+
+ /**
+ * Constructor.
+ *
+ * @param c Constant
+ */
+ public ConstantDistribution(double c) {
+ super();
+ this.c = c;
+ }
+
+ @Override
+ public double nextRandom() {
+ return c;
+ }
+
+ @Override
+ public double pdf(double val) {
+ return (val == c) ? 1 : 0;
+ }
+
+ @Override
+ public double cdf(double val) {
+ return (val >= c) ? 1.0 : 0.0;
+ }
+} \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/Distribution.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/Distribution.java
new file mode 100644
index 00000000..290e6434
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/Distribution.java
@@ -0,0 +1,63 @@
+package de.lmu.ifi.dbs.elki.math.statistics.distribution;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2012
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/**
+ * Interface for a simple distribution generator with a PDF, i.e. it can also
+ * compute a density
+ *
+ * @author Erich Schubert
+ */
+public interface Distribution {
+ /**
+ * Generate a new random value
+ *
+ * @return new random value
+ */
+ public double nextRandom();
+
+ /**
+ * Return the density of an existing value
+ *
+ * @param val existing value
+ * @return distribution density
+ */
+ public double pdf(double val);
+
+ /**
+ * Return the cumulative density function at the given value.
+ *
+ * @param val existing value
+ * @return cumulative density
+ */
+ public double cdf(double val);
+
+ /**
+ * Describe the distribution
+ *
+ * @return description
+ */
+ @Override
+ public String toString();
+}
diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/GammaDistribution.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/GammaDistribution.java
new file mode 100644
index 00000000..6830f25a
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/GammaDistribution.java
@@ -0,0 +1,470 @@
+package de.lmu.ifi.dbs.elki.math.statistics.distribution;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2012
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import java.util.Random;
+
+import de.lmu.ifi.dbs.elki.math.MathUtil;
+
+/**
+ * Gamma Distribution, with random generation and density functions.
+ *
+ * @author Erich Schubert
+ */
+public class GammaDistribution implements Distribution {
+ /**
+ * LANCZOS-Coefficients for Gamma approximation.
+ *
+ * These are said to have higher precision than those in "Numerical Recipes".
+ * They probably come from
+ *
+ * Paul Godfrey: http://my.fit.edu/~gabdo/gamma.txt
+ */
+ static final double[] LANCZOS = { 0.99999999999999709182, 57.156235665862923517, -59.597960355475491248, 14.136097974741747174, -0.49191381609762019978, .33994649984811888699e-4, .46523628927048575665e-4, -.98374475304879564677e-4, .15808870322491248884e-3, -.21026444172410488319e-3, .21743961811521264320e-3, -.16431810653676389022e-3, .84418223983852743293e-4, -.26190838401581408670e-4, .36899182659531622704e-5, };
+
+ /**
+ * Numerical precision to use
+ */
+ static final double NUM_PRECISION = 1E-15;
+
+ /**
+ * Alpha == k
+ */
+ private final double k;
+
+ /**
+ * Theta == 1 / Beta
+ */
+ private final double theta;
+
+ /**
+ * The random generator.
+ */
+ private Random random;
+
+ /**
+ * Constructor for Gamma distribution.
+ *
+ * @param k k, alpha aka. "shape" parameter
+ * @param theta Theta = 1.0/Beta aka. "scaling" parameter
+ * @param random Random generator
+ */
+ public GammaDistribution(double k, double theta, Random random) {
+ super();
+ if(k <= 0.0 || theta <= 0.0) {
+ throw new IllegalArgumentException("Invalid parameters for Gamma distribution.");
+ }
+
+ this.k = k;
+ this.theta = theta;
+ this.random = random;
+ }
+
+ /**
+ * Constructor for Gamma distribution.
+ *
+ * @param k k, alpha aka. "shape" parameter
+ * @param theta Theta = 1.0/Beta aka. "scaling" parameter
+ */
+ public GammaDistribution(double k, double theta) {
+ this(k, theta, new Random());
+ }
+
+ @Override
+ public double pdf(double val) {
+ return pdf(val, k, theta);
+ }
+
+ @Override
+ public double cdf(double val) {
+ return cdf(val, k, theta);
+ }
+
+ @Override
+ public double nextRandom() {
+ return nextRandom(k, theta, random);
+ }
+
+ /**
+ * Simple toString explaining the distribution parameters.
+ *
+ * Used in producing a model description.
+ */
+ @Override
+ public String toString() {
+ return "Gamma Distribution (k=" + k + ", theta=" + theta + ")";
+ }
+
+ /**
+ * @return the value of k
+ */
+ public double getK() {
+ return k;
+ }
+
+ /**
+ * @return the standard deviation
+ */
+ public double getTheta() {
+ return theta;
+ }
+
+ /**
+ * The CDF, static version.
+ *
+ * @param val Value
+ * @param k Shape k
+ * @param theta Theta = 1.0/Beta aka. "scaling" parameter
+ * @return cdf value
+ */
+ public static double cdf(double val, double k, double theta) {
+ return regularizedGammaP(k, val / theta);
+ }
+
+ /**
+ * Gamma distribution PDF (with 0.0 for x &lt; 0)
+ *
+ * @param x query value
+ * @param k Alpha
+ * @param theta Theta = 1 / Beta
+ * @return probability density
+ */
+ public static double pdf(double x, double k, double theta) {
+ if(x < 0) {
+ return 0.0;
+ }
+ if(x == 0) {
+ if(k == 1.0) {
+ return theta;
+ }
+ else {
+ return 0.0;
+ }
+ }
+ if(k == 1.0) {
+ return Math.exp(-x * theta) * theta;
+ }
+
+ return Math.exp((k - 1.0) * Math.log(x * theta) - x * theta - logGamma(k)) * theta;
+ }
+
+ /**
+ * Compute logGamma.
+ *
+ * Based loosely on "Numerical Recpies" and the work of Paul Godfrey at
+ * http://my.fit.edu/~gabdo/gamma.txt
+ *
+ * TODO: find out which approximation really is the best...
+ *
+ * @param x Parameter x
+ * @return log(&#915;(x))
+ */
+ public static double logGamma(final double x) {
+ if(Double.isNaN(x) || (x <= 0.0)) {
+ return Double.NaN;
+ }
+ double g = 607.0 / 128.0;
+ double tmp = x + g + .5;
+ tmp = (x + 0.5) * Math.log(tmp) - tmp;
+ double ser = LANCZOS[0];
+ for(int i = LANCZOS.length - 1; i > 0; --i) {
+ ser += LANCZOS[i] / (x + i);
+ }
+ return tmp + Math.log(MathUtil.SQRTTWOPI * ser / x);
+ }
+
+ /**
+ * Returns the regularized gamma function P(a, x).
+ *
+ * Includes the quadrature way of computing.
+ *
+ * TODO: find "the" most accurate version of this. We seem to agree with
+ * others for the first 10+ digits, but diverge a bit later than that.
+ *
+ * @param a Parameter a
+ * @param x Parameter x
+ * @return Gamma value
+ */
+ public static double regularizedGammaP(final double a, final double x) {
+ // Special cases
+ if(Double.isNaN(a) || Double.isNaN(x) || (a <= 0.0) || (x < 0.0)) {
+ return Double.NaN;
+ }
+ if(x == 0.0) {
+ return 0.0;
+ }
+ if(x >= a + 1) {
+ // Expected to converge faster
+ return 1.0 - regularizedGammaQ(a, x);
+ }
+ // Loosely following "Numerical Recipes"
+ double del = 1.0 / a;
+ double sum = del;
+ for(int n = 1; n < Integer.MAX_VALUE; n++) {
+ // compute next element in the series
+ del *= x / (a + n);
+ sum = sum + del;
+ if(Math.abs(del / sum) < NUM_PRECISION || sum >= Double.POSITIVE_INFINITY) {
+ break;
+ }
+ }
+ if(Double.isInfinite(sum)) {
+ return 1.0;
+ }
+ return Math.exp(-x + (a * Math.log(x)) - logGamma(a)) * sum;
+ }
+
+ /**
+ * Returns the regularized gamma function Q(a, x) = 1 - P(a, x).
+ *
+ * Includes the continued fraction way of computing, based loosely on the book
+ * "Numerical Recipes"; but probably not with the exactly same precision,
+ * since we reimplemented this in our coding style, not literally.
+ *
+ * TODO: find "the" most accurate version of this. We seem to agree with
+ * others for the first 10+ digits, but diverge a bit later than that.
+ *
+ * @param a parameter a
+ * @param x parameter x
+ * @return Result
+ */
+ public static double regularizedGammaQ(final double a, final double x) {
+ if(Double.isNaN(a) || Double.isNaN(x) || (a <= 0.0) || (x < 0.0)) {
+ return Double.NaN;
+ }
+ if(x == 0.0) {
+ return 1.0;
+ }
+ if(x < a + 1.0) {
+ // Expected to converge faster
+ return 1.0 - regularizedGammaP(a, x);
+ }
+ // Compute using continued fraction approach.
+ final double FPMIN = Double.MIN_VALUE / NUM_PRECISION;
+ double b = x + 1 - a;
+ double c = 1.0 / FPMIN;
+ double d = 1.0 / b;
+ double fac = d;
+ for(int i = 1; i < Integer.MAX_VALUE; i++) {
+ double an = i * (a - i);
+ b += 2;
+ d = an * d + b;
+ if(Math.abs(d) < FPMIN) {
+ d = FPMIN;
+ }
+ c = b + an / c;
+ if(Math.abs(c) < FPMIN) {
+ c = FPMIN;
+ }
+ d = 1 / d;
+ double del = d * c;
+ fac *= del;
+ if(Math.abs(del - 1.0) <= NUM_PRECISION) {
+ break;
+ }
+ }
+ return fac * Math.exp(-x + a * Math.log(x) - logGamma(a));
+ }
+
+ /**
+ * Generate a random value with the generators parameters.
+ *
+ * Along the lines of
+ *
+ * - J.H. Ahrens, U. Dieter (1974): Computer methods for sampling from gamma,
+ * beta, Poisson and binomial distributions, Computing 12, 223-246.
+ *
+ * - J.H. Ahrens, U. Dieter (1982): Generating gamma variates by a modified
+ * rejection technique, Communications of the ACM 25, 47-54.
+ *
+ * @param k K parameter
+ * @param theta Theta parameter
+ * @param random Random generator
+ */
+ public static double nextRandom(double k, double theta, Random random) {
+ /* Constants */
+ final double q1 = 0.0416666664, q2 = 0.0208333723, q3 = 0.0079849875;
+ final double q4 = 0.0015746717, q5 = -0.0003349403, q6 = 0.0003340332;
+ final double q7 = 0.0006053049, q8 = -0.0004701849, q9 = 0.0001710320;
+ final double a1 = 0.333333333, a2 = -0.249999949, a3 = 0.199999867;
+ final double a4 = -0.166677482, a5 = 0.142873973, a6 = -0.124385581;
+ final double a7 = 0.110368310, a8 = -0.112750886, a9 = 0.104089866;
+ final double e1 = 1.000000000, e2 = 0.499999994, e3 = 0.166666848;
+ final double e4 = 0.041664508, e5 = 0.008345522, e6 = 0.001353826;
+ final double e7 = 0.000247453;
+
+ if(k < 1.0) { // Base case, for small k
+ final double b = 1.0 + 0.36788794412 * k; // Step 1
+ while(true) {
+ final double p = b * random.nextDouble();
+ if(p <= 1.0) { // when gds <= 1
+ final double gds = Math.exp(Math.log(p) / k);
+ if(Math.log(random.nextDouble()) <= -gds) {
+ return (gds / theta);
+ }
+ }
+ else { // when gds > 1
+ final double gds = -Math.log((b - p) / k);
+ if(Math.log(random.nextDouble()) <= ((k - 1.0) * Math.log(gds))) {
+ return (gds / theta);
+ }
+ }
+ }
+ }
+ else {
+ // Step 1. Preparations
+ final double ss, s, d;
+ if(k != -1.0) {
+ ss = k - 0.5;
+ s = Math.sqrt(ss);
+ d = 5.656854249 - 12.0 * s;
+ }
+ else {
+ // For k == -1.0:
+ ss = 0.0;
+ s = 0.0;
+ d = 0.0;
+ }
+ // Random vector of maximum length 1
+ final double v1, /* v2, */v12;
+ { // Temporary values - candidate
+ double tv1, tv2, tv12;
+ do {
+ tv1 = 2.0 * random.nextDouble() - 1.0;
+ tv2 = 2.0 * random.nextDouble() - 1.0;
+ tv12 = tv1 * tv1 + tv2 * tv2;
+ }
+ while(tv12 > 1.0);
+ v1 = tv1;
+ /* v2 = tv2; */
+ v12 = tv12;
+ }
+
+ // double b = 0.0, c = 0.0;
+ // double si = 0.0, q0 = 0.0;
+ final double b, c, si, q0;
+
+ // Simpler accept cases & parameter computation
+ {
+ final double t = v1 * Math.sqrt(-2.0 * Math.log(v12) / v12);
+ final double x = s + 0.5 * t;
+ final double gds = x * x;
+ if(t >= 0.0) {
+ return (gds / theta); // Immediate acceptance
+ }
+
+ // Random uniform
+ final double un = random.nextDouble();
+ // Squeeze acceptance
+ if(d * un <= t * t * t) {
+ return (gds / theta);
+ }
+
+ if(k != -1.0) { // Step 4. Set-up for hat case
+ final double r = 1.0 / k;
+ q0 = ((((((((q9 * r + q8) * r + q7) * r + q6) * r + q5) * r + q4) * r + q3) * r + q2) * r + q1) * r;
+ if(k > 3.686) {
+ if(k > 13.022) {
+ b = 1.77;
+ si = 0.75;
+ c = 0.1515 / s;
+ }
+ else {
+ b = 1.654 + 0.0076 * ss;
+ si = 1.68 / s + 0.275;
+ c = 0.062 / s + 0.024;
+ }
+ }
+ else {
+ b = 0.463 + s - 0.178 * ss;
+ si = 1.235;
+ c = 0.195 / s - 0.079 + 0.016 * s;
+ }
+ }
+ else {
+ // For k == -1.0:
+ b = 0.0;
+ c = 0.0;
+ si = 0.0;
+ q0 = 0.0;
+ }
+ // Compute v and q
+ if(x > 0.0) {
+ final double v = t / (s + s);
+ final double q;
+ if(Math.abs(v) > 0.25) {
+ q = q0 - s * t + 0.25 * t * t + (ss + ss) * Math.log(1.0 + v);
+ }
+ else {
+ q = q0 + 0.5 * t * t * ((((((((a9 * v + a8) * v + a7) * v + a6) * v + a5) * v + a4) * v + a3) * v + a2) * v + a1) * v;
+ }
+ // Quotient acceptance:
+ if(Math.log(1.0 - un) <= q) {
+ return (gds / theta);
+ }
+ }
+ }
+
+ // Double exponential deviate t
+ while(true) {
+ double e, u, sign_u, t;
+ // Retry until t is sufficiently large
+ do {
+ e = -Math.log(random.nextDouble());
+ u = random.nextDouble();
+ u = u + u - 1.0;
+ sign_u = (u > 0) ? 1.0 : -1.0;
+ t = b + (e * si) * sign_u;
+ }
+ while(t <= -0.71874483771719);
+
+ // New v(t) and q(t)
+ final double v = t / (s + s);
+ final double q;
+ if(Math.abs(v) > 0.25) {
+ q = q0 - s * t + 0.25 * t * t + (ss + ss) * Math.log(1.0 + v);
+ }
+ else {
+ q = q0 + 0.5 * t * t * ((((((((a9 * v + a8) * v + a7) * v + a6) * v + a5) * v + a4) * v + a3) * v + a2) * v + a1) * v;
+ }
+ if(q <= 0.0) {
+ continue; // retry
+ }
+ // Compute w(t)
+ final double w;
+ if(q > 0.5) {
+ w = Math.exp(q) - 1.0;
+ }
+ else {
+ w = ((((((e7 * q + e6) * q + e5) * q + e4) * q + e3) * q + e2) * q + e1) * q;
+ }
+ // Hat acceptance
+ if(c * u * sign_u <= w * Math.exp(e - 0.5 * t * t)) {
+ final double x = s + 0.5 * t;
+ return (x * x / theta);
+ }
+ }
+ }
+ }
+} \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/NormalDistribution.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/NormalDistribution.java
new file mode 100644
index 00000000..919cc2e3
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/NormalDistribution.java
@@ -0,0 +1,337 @@
+package de.lmu.ifi.dbs.elki.math.statistics.distribution;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2012
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import java.util.Random;
+
+import de.lmu.ifi.dbs.elki.math.MathUtil;
+
+/**
+ * Gaussian distribution aka normal distribution
+ *
+ * @author Erich Schubert
+ */
+public class NormalDistribution implements Distribution {
+ /**
+ * Coefficients for erf approximation.
+ *
+ * Loosely based on http://www.netlib.org/specfun/erf
+ */
+ static final double ERFAPP_A[] = { 1.85777706184603153e-1, 3.16112374387056560e+0, 1.13864154151050156E+2, 3.77485237685302021e+2, 3.20937758913846947e+3 };
+
+ /**
+ * Coefficients for erf approximation.
+ *
+ * Loosely based on http://www.netlib.org/specfun/erf
+ */
+ static final double ERFAPP_B[] = { 1.00000000000000000e00, 2.36012909523441209e01, 2.44024637934444173e02, 1.28261652607737228e03, 2.84423683343917062e03 };
+
+ /**
+ * Coefficients for erf approximation.
+ *
+ * Loosely based on http://www.netlib.org/specfun/erf
+ */
+ static final double ERFAPP_C[] = { 2.15311535474403846e-8, 5.64188496988670089e-1, 8.88314979438837594e00, 6.61191906371416295e01, 2.98635138197400131e02, 8.81952221241769090e02, 1.71204761263407058e03, 2.05107837782607147e03, 1.23033935479799725E03 };
+
+ /**
+ * Coefficients for erf approximation.
+ *
+ * Loosely based on http://www.netlib.org/specfun/erf
+ */
+ static final double ERFAPP_D[] = { 1.00000000000000000e00, 1.57449261107098347e01, 1.17693950891312499e02, 5.37181101862009858e02, 1.62138957456669019e03, 3.29079923573345963e03, 4.36261909014324716e03, 3.43936767414372164e03, 1.23033935480374942e03 };
+
+ /**
+ * Coefficients for erf approximation.
+ *
+ * Loosely based on http://www.netlib.org/specfun/erf
+ */
+ static final double ERFAPP_P[] = { 1.63153871373020978e-2, 3.05326634961232344e-1, 3.60344899949804439e-1, 1.25781726111229246e-1, 1.60837851487422766e-2, 6.58749161529837803e-4 };
+
+ /**
+ * Coefficients for erf approximation.
+ *
+ * Loosely based on http://www.netlib.org/specfun/erf
+ */
+ static final double ERFAPP_Q[] = { 1.00000000000000000e00, 2.56852019228982242e00, 1.87295284992346047e00, 5.27905102951428412e-1, 6.05183413124413191e-2, 2.33520497626869185e-3 };
+
+ /**
+ * Treshold for switching nethods for erfinv approximation
+ */
+ static final double P_LOW = 0.02425D;
+
+ /**
+ * Treshold for switching nethods for erfinv approximation
+ */
+ static final double P_HIGH = 1.0D - P_LOW;
+
+ /**
+ * Coefficients for erfinv approximation, rational version
+ */
+ static final double ERFINV_A[] = { -3.969683028665376e+01, 2.209460984245205e+02, -2.759285104469687e+02, 1.383577518672690e+02, -3.066479806614716e+01, 2.506628277459239e+00 };
+
+ /**
+ * Coefficients for erfinv approximation, rational version
+ */
+ static final double ERFINV_B[] = { -5.447609879822406e+01, 1.615858368580409e+02, -1.556989798598866e+02, 6.680131188771972e+01, -1.328068155288572e+01 };
+
+ /**
+ * Coefficients for erfinv approximation, rational version
+ */
+ static final double ERFINV_C[] = { -7.784894002430293e-03, -3.223964580411365e-01, -2.400758277161838e+00, -2.549732539343734e+00, 4.374664141464968e+00, 2.938163982698783e+00 };
+
+ /**
+ * Coefficients for erfinv approximation, rational version
+ */
+ static final double ERFINV_D[] = { 7.784695709041462e-03, 3.224671290700398e-01, 2.445134137142996e+00, 3.754408661907416e+00 };
+
+ /**
+ * Mean value for the generator
+ */
+ private double mean;
+
+ /**
+ * Standard deviation
+ */
+ private double stddev;
+
+ /**
+ * The random generator.
+ */
+ private Random random;
+
+ /**
+ * Constructor for Gaussian distribution
+ *
+ * @param mean Mean
+ * @param stddev Standard Deviation
+ * @param random Random generator
+ */
+ public NormalDistribution(double mean, double stddev, Random random) {
+ super();
+ this.mean = mean;
+ this.stddev = stddev;
+ this.random = random;
+ }
+
+ /**
+ * Constructor for Gaussian distribution
+ *
+ * @param mean Mean
+ * @param stddev Standard Deviation
+ */
+ public NormalDistribution(double mean, double stddev) {
+ this(mean, stddev, new Random());
+ }
+
+ @Override
+ public double pdf(double val) {
+ return pdf(val, mean, stddev);
+ }
+
+ @Override
+ public double cdf(double val) {
+ return cdf(val, mean, stddev);
+ }
+
+ @Override
+ public double nextRandom() {
+ return mean + random.nextGaussian() * stddev;
+ }
+
+ @Override
+ public String toString() {
+ return "Normal Distribution (mean="+mean+", stddev="+stddev+")";
+ }
+
+ /**
+ * @return the mean
+ */
+ public double getMean() {
+ return mean;
+ }
+
+ /**
+ * @return the standard deviation
+ */
+ public double getStddev() {
+ return stddev;
+ }
+
+ /**
+ * Complementary error function for Gaussian distributions = Normal
+ * distributions.
+ *
+ * Numerical approximation using taylor series. Implementation loosely based
+ * on http://www.netlib.org/specfun/erf
+ *
+ * @param x parameter value
+ * @return erfc(x)
+ */
+ public static double erfc(double x) {
+ if(Double.isNaN(x)) {
+ return Double.NaN;
+ }
+ if(Double.isInfinite(x)) {
+ return (x < 0.0) ? 2 : 0;
+ }
+
+ double result = Double.NaN;
+ double absx = Math.abs(x);
+ // First approximation interval
+ if(absx < 0.46875) {
+ double z = x * x;
+ result = 1 - x * ((((ERFAPP_A[0] * z + ERFAPP_A[1]) * z + ERFAPP_A[2]) * z + ERFAPP_A[3]) * z + ERFAPP_A[4]) / ((((ERFAPP_B[0] * z + ERFAPP_B[1]) * z + ERFAPP_B[2]) * z + ERFAPP_B[3]) * z + ERFAPP_B[4]);
+ }
+ // Second approximation interval
+ else if(absx < 4.0) {
+ double z = absx;
+ result = ((((((((ERFAPP_C[0] * z + ERFAPP_C[1]) * z + ERFAPP_C[2]) * z + ERFAPP_C[3]) * z + ERFAPP_C[4]) * z + ERFAPP_C[5]) * z + ERFAPP_C[6]) * z + ERFAPP_C[7]) * z + ERFAPP_C[8]) / ((((((((ERFAPP_D[0] * z + ERFAPP_D[1]) * z + ERFAPP_D[2]) * z + ERFAPP_D[3]) * z + ERFAPP_D[4]) * z + ERFAPP_D[5]) * z + ERFAPP_D[6]) * z + ERFAPP_D[7]) * z + ERFAPP_D[8]);
+ double rounded = Math.round(result * 16.0) / 16.0;
+ double del = (absx - rounded) * (absx + rounded);
+ result = Math.exp(-rounded * rounded) * Math.exp(-del) * result;
+ if(x < 0.0) {
+ result = 2.0 - result;
+ }
+ }
+ // Third approximation interval
+ else {
+ double z = 1.0 / (absx * absx);
+ result = z * (((((ERFAPP_P[0] * z + ERFAPP_P[1]) * z + ERFAPP_P[2]) * z + ERFAPP_P[3]) * z + ERFAPP_P[4]) * z + ERFAPP_P[5]) / (((((ERFAPP_Q[0] * z + ERFAPP_Q[1]) * z + ERFAPP_Q[2]) * z + ERFAPP_Q[3]) * z + ERFAPP_Q[4]) * z + ERFAPP_Q[5]);
+ result = (MathUtil.ONE_BY_SQRTPI - result) / absx;
+ double rounded = Math.round(result * 16.0) / 16.0;
+ double del = (absx - rounded) * (absx + rounded);
+ result = Math.exp(-rounded * rounded) * Math.exp(-del) * result;
+ if(x < 0.0) {
+ result = 2.0 - result;
+ }
+ }
+ return result;
+ }
+
+ /**
+ * Error function for Gaussian distributions = Normal distributions.
+ *
+ * Numerical approximation using taylor series. Implementation loosely based
+ * on http://www.netlib.org/specfun/erf
+ *
+ * @param x parameter value
+ * @return erf(x)
+ */
+ public static double erf(double x) {
+ return 1 - erfc(x);
+ }
+
+ /**
+ * Inverse error function.
+ *
+ * @param x parameter value
+ * @return erfinv(x)
+ */
+ public static double erfinv(double x) {
+ return standardNormalProbit(0.5 * (x + 1)) / MathUtil.SQRT2;
+ }
+
+ /**
+ * Approximate the inverse error function for normal distributions.
+ *
+ * Largely based on:
+ * <p>
+ * http://www.math.uio.no/~jacklam/notes/invnorm/index.html <br>
+ * by Peter John Acklam
+ * </p>
+ *
+ * @param d Quantile. Must be in [0:1], obviously.
+ * @return Inverse erf.
+ */
+ public static double standardNormalProbit(double d) {
+ if(d == 0) {
+ return Double.NEGATIVE_INFINITY;
+ }
+ else if(d == 1) {
+ return Double.POSITIVE_INFINITY;
+ }
+ else if(Double.isNaN(d) || d < 0 || d > 1) {
+ return Double.NaN;
+ }
+ else if(d < P_LOW) {
+ // Rational approximation for lower region:
+ double q = Math.sqrt(-2 * Math.log(d));
+ return (((((ERFINV_C[0] * q + ERFINV_C[1]) * q + ERFINV_C[2]) * q + ERFINV_C[3]) * q + ERFINV_C[4]) * q + ERFINV_C[5]) / ((((ERFINV_D[0] * q + ERFINV_D[1]) * q + ERFINV_D[2]) * q + ERFINV_D[3]) * q + 1);
+ }
+ else if(P_HIGH < d) {
+ // Rational approximation for upper region:
+ double q = Math.sqrt(-2 * Math.log(1 - d));
+ return -(((((ERFINV_C[0] * q + ERFINV_C[1]) * q + ERFINV_C[2]) * q + ERFINV_C[3]) * q + ERFINV_C[4]) * q + ERFINV_C[5]) / ((((ERFINV_D[0] * q + ERFINV_D[1]) * q + ERFINV_D[2]) * q + ERFINV_D[3]) * q + 1);
+ }
+ else {
+ // Rational approximation for central region:
+ double q = d - 0.5D;
+ double r = q * q;
+ return (((((ERFINV_A[0] * r + ERFINV_A[1]) * r + ERFINV_A[2]) * r + ERFINV_A[3]) * r + ERFINV_A[4]) * r + ERFINV_A[5]) * q / (((((ERFINV_B[0] * r + ERFINV_B[1]) * r + ERFINV_B[2]) * r + ERFINV_B[3]) * r + ERFINV_B[4]) * r + 1);
+ }
+ }
+
+ /**
+ * Probability density function of the normal distribution.
+ *
+ * <pre>
+ * 1/(SQRT(2*pi*sigma^2)) * e^(-(x-mu)^2/2sigma^2)
+ * </pre>
+ *
+ * @param x The value.
+ * @param mu The mean.
+ * @param sigma The standard deviation.
+ * @return PDF of the given normal distribution at x.
+ */
+ public static double pdf(double x, double mu, double sigma) {
+ final double x_mu = x - mu;
+ final double sigmasq = sigma * sigma;
+ return 1 / (Math.sqrt(MathUtil.TWOPI * sigmasq)) * Math.exp(-1 * x_mu * x_mu / 2 / sigmasq);
+ }
+
+ /**
+ * Cumulative probability density function (CDF) of a normal distribution.
+ *
+ * @param x value to evaluate CDF at
+ * @param mu Mean value
+ * @param sigma Standard deviation.
+ * @return The CDF of the normal given distribution at x.
+ */
+ public static double cdf(double x, double mu, double sigma) {
+ return (1 + erf(x / Math.sqrt(2))) / 2;
+ }
+
+ /**
+ * Inverse cumulative probability density function (probit) of a normal
+ * distribution.
+ *
+ * @param x value to evaluate probit function at
+ * @param mu Mean value
+ * @param sigma Standard deviation.
+ * @return The probit of the normal given distribution at x.
+ */
+ public static double probit(double x, double mu, double sigma) {
+ return mu + sigma * standardNormalProbit(x);
+ }
+}
diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/UniformDistribution.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/UniformDistribution.java
new file mode 100644
index 00000000..9571cfd3
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/UniformDistribution.java
@@ -0,0 +1,132 @@
+package de.lmu.ifi.dbs.elki.math.statistics.distribution;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2012
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import java.util.Random;
+
+/**
+ * Uniform distribution.
+ *
+ * @author Erich Schubert
+ */
+public class UniformDistribution implements Distribution {
+ /**
+ * Minimum
+ */
+ private double min;
+
+ /**
+ * Maximum
+ */
+ private double max;
+
+ /**
+ * Len := max - min
+ */
+ private double len;
+
+ /**
+ * The random generator.
+ */
+ private Random random;
+
+ /**
+ * Constructor for a uniform distribution on the interval [min, max[
+ *
+ * @param min Minimum value
+ * @param max Maximum value
+ * @param random Random generator
+ */
+ public UniformDistribution(double min, double max, Random random) {
+ super();
+ // Swap parameters if they were given incorrectly.
+ if(min > max) {
+ double tmp = min;
+ min = max;
+ max = tmp;
+ }
+ this.min = min;
+ this.max = max;
+ this.len = max - min;
+ this.random = random;
+ }
+
+ /**
+ * Constructor for a uniform distribution on the interval [min, max[
+ *
+ * @param min Minimum value
+ * @param max Maximum value
+ */
+ public UniformDistribution(double min, double max) {
+ this(min, max, new Random());
+ }
+
+ @Override
+ public double pdf(double val) {
+ if(val < min || val >= max) {
+ return 0.0;
+ }
+ return 1.0 / len;
+ }
+
+ @Override
+ public double cdf(double val) {
+ if(val < min) {
+ return 0.0;
+ }
+ if(val > max) {
+ return 1.0;
+ }
+ return (val - min) / len;
+ }
+
+ @Override
+ public double nextRandom() {
+ return min + random.nextDouble() * len;
+ }
+
+ /**
+ * Simple toString explaining the distribution parameters.
+ *
+ * Used in describing cluster models.
+ */
+ @Override
+ public String toString() {
+ return "Uniform Distribution (min=" + min + ", max=" + max + ")";
+ }
+
+ /**
+ * @return the minimum value
+ */
+ public double getMin() {
+ return min;
+ }
+
+ /**
+ * @return the maximum value
+ */
+ public double getMax() {
+ return max;
+ }
+} \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/package-info.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/package-info.java
new file mode 100644
index 00000000..ed9c0e88
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/package-info.java
@@ -0,0 +1,26 @@
+/**
+ * <p>Standard distributions, with random generation functionalities.</p>
+ */
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2012
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+package de.lmu.ifi.dbs.elki.math.statistics.distribution; \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/package-info.java b/src/de/lmu/ifi/dbs/elki/math/statistics/package-info.java
index bd1195f3..142524e6 100644
--- a/src/de/lmu/ifi/dbs/elki/math/statistics/package-info.java
+++ b/src/de/lmu/ifi/dbs/elki/math/statistics/package-info.java
@@ -5,7 +5,7 @@
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
-Copyright (C) 2011
+Copyright (C) 2012
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team