summaryrefslogtreecommitdiff
path: root/src/de/lmu/ifi/dbs/elki/math
diff options
context:
space:
mode:
Diffstat (limited to 'src/de/lmu/ifi/dbs/elki/math')
-rw-r--r--src/de/lmu/ifi/dbs/elki/math/MathUtil.java21
-rw-r--r--src/de/lmu/ifi/dbs/elki/math/geometry/AlphaShape.java3
-rw-r--r--src/de/lmu/ifi/dbs/elki/math/geometry/GrahamScanConvexHull2D.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/math/geometry/SweepHullDelaunay2D.java4
-rw-r--r--src/de/lmu/ifi/dbs/elki/math/geometry/XYCurve.java418
-rw-r--r--src/de/lmu/ifi/dbs/elki/math/histograms/AggregatingHistogram.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/math/histograms/FlexiHistogram.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/math/histograms/ReplacingHistogram.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/math/linearalgebra/Centroid.java13
-rw-r--r--src/de/lmu/ifi/dbs/elki/math/linearalgebra/CovarianceMatrix.java13
-rw-r--r--src/de/lmu/ifi/dbs/elki/math/linearalgebra/LinearEquationSystem.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/math/linearalgebra/Matrix.java1
-rw-r--r--src/de/lmu/ifi/dbs/elki/math/linearalgebra/ProjectedCentroid.java13
-rw-r--r--src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/DropEigenPairFilter.java146
-rw-r--r--src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/FilteredEigenPairs.java4
-rw-r--r--src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/PCAFilteredAutotuningRunner.java4
-rw-r--r--src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/PCAFilteredResult.java60
-rw-r--r--src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/PCAResult.java25
-rw-r--r--src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/SignificantEigenPairFilter.java3
-rw-r--r--src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/WeightedCovarianceMatrixBuilder.java12
-rw-r--r--src/de/lmu/ifi/dbs/elki/math/scales/Scales.java6
-rw-r--r--src/de/lmu/ifi/dbs/elki/math/spacefillingcurves/HilbertSpatialSorter.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/math/spacefillingcurves/ZCurve.java264
-rw-r--r--src/de/lmu/ifi/dbs/elki/math/spacefillingcurves/ZCurveTransformer.java124
-rw-r--r--src/de/lmu/ifi/dbs/elki/math/statistics/distribution/BetaDistribution.java499
-rw-r--r--src/de/lmu/ifi/dbs/elki/math/statistics/distribution/ChiDistribution.java17
-rw-r--r--src/de/lmu/ifi/dbs/elki/math/statistics/distribution/ChiSquaredDistribution.java23
-rw-r--r--src/de/lmu/ifi/dbs/elki/math/statistics/distribution/ConstantDistribution.java7
-rw-r--r--src/de/lmu/ifi/dbs/elki/math/statistics/distribution/Distribution.java20
-rw-r--r--src/de/lmu/ifi/dbs/elki/math/statistics/distribution/DistributionWithRandom.java37
-rw-r--r--src/de/lmu/ifi/dbs/elki/math/statistics/distribution/GammaDistribution.java496
-rw-r--r--src/de/lmu/ifi/dbs/elki/math/statistics/distribution/NormalDistribution.java26
-rw-r--r--src/de/lmu/ifi/dbs/elki/math/statistics/distribution/PoissonDistribution.java411
-rw-r--r--src/de/lmu/ifi/dbs/elki/math/statistics/distribution/StudentsTDistribution.java90
-rw-r--r--src/de/lmu/ifi/dbs/elki/math/statistics/distribution/UniformDistribution.java14
-rw-r--r--src/de/lmu/ifi/dbs/elki/math/statistics/tests/GoodnessOfFitTest.java49
-rw-r--r--src/de/lmu/ifi/dbs/elki/math/statistics/tests/KolmogorovSmirnovTest.java117
-rw-r--r--src/de/lmu/ifi/dbs/elki/math/statistics/tests/WelchTTest.java124
-rw-r--r--src/de/lmu/ifi/dbs/elki/math/statistics/tests/package-info.java26
39 files changed, 2716 insertions, 386 deletions
diff --git a/src/de/lmu/ifi/dbs/elki/math/MathUtil.java b/src/de/lmu/ifi/dbs/elki/math/MathUtil.java
index 7eea5ed7..c44a0203 100644
--- a/src/de/lmu/ifi/dbs/elki/math/MathUtil.java
+++ b/src/de/lmu/ifi/dbs/elki/math/MathUtil.java
@@ -33,7 +33,7 @@ import de.lmu.ifi.dbs.elki.math.linearalgebra.Vector;
/**
* A collection of math related utility functions.
*
- * @author Arthur Zimekt
+ * @author Arthur Zimek
* @author Erich Schubert
*
* @apiviz.landmark
@@ -45,7 +45,7 @@ public final class MathUtil {
public static final double TWOPI = 2 * Math.PI;
/**
- * Squre root of two times Pi.
+ * Square root of two times Pi.
*/
public static final double SQRTTWOPI = Math.sqrt(TWOPI);
@@ -60,7 +60,7 @@ public final class MathUtil {
public static final double SQRT5 = Math.sqrt(5);
/**
- * Square root of 0.5 == 1 / Sqrt(2)
+ * Square root of 0.5 == 1 / sqrt(2)
*/
public static final double SQRTHALF = Math.sqrt(.5);
@@ -75,11 +75,26 @@ public final class MathUtil {
public static final double LOG2 = Math.log(2);
/**
+ * Natural logarithm of 10
+ */
+ public static final double LOG10 = Math.log(10);
+
+ /**
* Math.log(Math.PI)
*/
public static final double LOGPI = Math.log(Math.PI);
/**
+ * Math.log(Math.PI) / 2
+ */
+ public static final double LOGPIHALF = LOGPI / 2.;
+
+ /**
+ * Math.log(Math.sqrt(2*Math.PI))
+ */
+ public static final double LOGSQRTTWOPI = Math.log(SQRTTWOPI);
+
+ /**
* Fake constructor for static class.
*/
private MathUtil() {
diff --git a/src/de/lmu/ifi/dbs/elki/math/geometry/AlphaShape.java b/src/de/lmu/ifi/dbs/elki/math/geometry/AlphaShape.java
index 8d2eb0f6..35858c67 100644
--- a/src/de/lmu/ifi/dbs/elki/math/geometry/AlphaShape.java
+++ b/src/de/lmu/ifi/dbs/elki/math/geometry/AlphaShape.java
@@ -35,6 +35,9 @@ import de.lmu.ifi.dbs.elki.math.linearalgebra.Vector;
* Compute the alpha-Shape of a point set, using Delaunay triangulation.
*
* @author Erich Schubert
+ *
+ * @apiviz.uses SweepHullDelaunay2D
+ * @apiviz.has Polygon
*/
public class AlphaShape {
/**
diff --git a/src/de/lmu/ifi/dbs/elki/math/geometry/GrahamScanConvexHull2D.java b/src/de/lmu/ifi/dbs/elki/math/geometry/GrahamScanConvexHull2D.java
index 6d71bbf1..def48c52 100644
--- a/src/de/lmu/ifi/dbs/elki/math/geometry/GrahamScanConvexHull2D.java
+++ b/src/de/lmu/ifi/dbs/elki/math/geometry/GrahamScanConvexHull2D.java
@@ -40,6 +40,8 @@ import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
* classic Grahams scan. Also computes a bounding box.
*
* @author Erich Schubert
+ *
+ * @apiviz.has Polygon
*/
@Reference(authors = "Paul Graham", title = "An Efficient Algorithm for Determining the Convex Hull of a Finite Planar Set", booktitle = "Information Processing Letters 1")
public class GrahamScanConvexHull2D {
diff --git a/src/de/lmu/ifi/dbs/elki/math/geometry/SweepHullDelaunay2D.java b/src/de/lmu/ifi/dbs/elki/math/geometry/SweepHullDelaunay2D.java
index 2533a2b0..4f78cb76 100644
--- a/src/de/lmu/ifi/dbs/elki/math/geometry/SweepHullDelaunay2D.java
+++ b/src/de/lmu/ifi/dbs/elki/math/geometry/SweepHullDelaunay2D.java
@@ -46,6 +46,8 @@ import de.lmu.ifi.dbs.elki.utilities.pairs.IntIntPair;
* Note: This implementation does not check or handle duplicate points!
*
* @author Erich Schubert
+ *
+ * @apiviz.has Polygon
*/
@Reference(authors = "David Sinclair", title = "S-hull: a fast sweep-hull routine for Delaunay triangulation", booktitle = "Online: http://s-hull.org/")
public class SweepHullDelaunay2D {
@@ -680,6 +682,8 @@ public class SweepHullDelaunay2D {
* Class representing a triangle, by referencing points in a list.
*
* @author Erich Schubert
+ *
+ * @apiviz.exclude
*/
public static class Triangle {
/**
diff --git a/src/de/lmu/ifi/dbs/elki/math/geometry/XYCurve.java b/src/de/lmu/ifi/dbs/elki/math/geometry/XYCurve.java
new file mode 100644
index 00000000..5d39de16
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/math/geometry/XYCurve.java
@@ -0,0 +1,418 @@
+package de.lmu.ifi.dbs.elki.math.geometry;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2012
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import gnu.trove.list.array.TDoubleArrayList;
+import de.lmu.ifi.dbs.elki.result.Result;
+import de.lmu.ifi.dbs.elki.result.textwriter.TextWriteable;
+import de.lmu.ifi.dbs.elki.result.textwriter.TextWriterStream;
+
+/**
+ * An XYCurve is an ordered collection of 2d points, meant for chart generation.
+ * Of key interest is the method {@link #addAndSimplify} which tries to simplify
+ * the curve while adding points.
+ *
+ * @author Erich Schubert
+ */
+public class XYCurve implements Result, TextWriteable {
+ /**
+ * Simplification threshold
+ */
+ protected static final double THRESHOLD = 1E-13;
+
+ /**
+ * X and Y positions
+ */
+ protected TDoubleArrayList data;
+
+ /**
+ * Label of X axis
+ */
+ protected String labelx;
+
+ /**
+ * Label of Y axis
+ */
+ protected String labely;
+
+ /**
+ * Minimum and maximum for X axis
+ */
+ protected double minx = Double.POSITIVE_INFINITY,
+ maxx = Double.NEGATIVE_INFINITY;
+
+ /**
+ * Minimum and maximum for Y axis
+ */
+ protected double miny = Double.POSITIVE_INFINITY,
+ maxy = Double.NEGATIVE_INFINITY;
+
+ /**
+ * Constructor with labels
+ *
+ * @param labelx Label for X axis
+ * @param labely Label for Y axis
+ */
+ public XYCurve(String labelx, String labely) {
+ super();
+ this.data = new TDoubleArrayList();
+ this.labelx = labelx;
+ this.labely = labely;
+ }
+
+ /**
+ * Constructor with size estimate and labels.
+ *
+ * @param labelx Label for X axis
+ * @param labely Label for Y axis
+ * @param size Estimated size (initial allocation size)
+ */
+ public XYCurve(String labelx, String labely, int size) {
+ super();
+ this.data = new TDoubleArrayList(size * 2);
+ this.labelx = labelx;
+ this.labely = labely;
+ }
+
+ /**
+ * Constructor.
+ */
+ public XYCurve() {
+ this("X", "Y");
+ }
+
+ /**
+ * Constructor with size estimate
+ *
+ * @param size Estimated size (initial allocation size)
+ */
+ public XYCurve(int size) {
+ this("X", "Y", size);
+ }
+
+ /**
+ * Constructor, cloning an existing curve.
+ *
+ * @param curve Curve to clone.
+ */
+ public XYCurve(XYCurve curve) {
+ super();
+ this.data = new TDoubleArrayList(curve.data);
+ this.labelx = curve.labelx;
+ this.labely = curve.labely;
+ this.minx = curve.minx;
+ this.maxx = curve.maxx;
+ this.miny = curve.miny;
+ this.maxy = curve.maxy;
+ }
+
+ /**
+ * Add a coordinate pair, but don't simplify
+ *
+ * @param x X coordinate
+ * @param y Y coordinate
+ */
+ public void add(double x, double y) {
+ data.add(x);
+ data.add(y);
+ minx = Math.min(minx, x);
+ maxx = Math.max(maxx, x);
+ miny = Math.min(miny, y);
+ maxy = Math.max(maxy, y);
+ }
+
+ /**
+ * Add a coordinate pair, performing curve simplification if possible.
+ *
+ * @param x X coordinate
+ * @param y Y coordinate
+ */
+ public void addAndSimplify(double x, double y) {
+ // simplify curve when possible:
+ final int len = data.size();
+ if(len >= 4) {
+ // Look at the previous 2 points
+ final double l1x = data.get(len - 4);
+ final double l1y = data.get(len - 3);
+ final double l2x = data.get(len - 2);
+ final double l2y = data.get(len - 1);
+ // Differences:
+ final double ldx = l2x - l1x;
+ final double ldy = l2y - l1y;
+ final double cdx = x - l2x;
+ final double cdy = y - l2y;
+ // X simplification
+ if((ldx == 0) && (cdx == 0)) {
+ data.remove(len - 2, 2);
+ }
+ // horizontal simplification
+ else if((ldy == 0) && (cdy == 0)) {
+ data.remove(len - 2, 2);
+ }
+ // diagonal simplification
+ else if(ldy > 0 && cdy > 0) {
+ if(Math.abs((ldx / ldy) - (cdx / cdy)) < THRESHOLD) {
+ data.remove(len - 2, 2);
+ }
+ }
+ }
+ add(x, y);
+ }
+
+ /**
+ * Get label of x axis
+ *
+ * @return label of x axis
+ */
+ public String getLabelx() {
+ return labelx;
+ }
+
+ /**
+ * Get label of y axis
+ *
+ * @return label of y axis
+ */
+ public String getLabely() {
+ return labely;
+ }
+
+ /**
+ * Minimum on x axis.
+ *
+ * @return Minimum on X
+ */
+ public double getMinx() {
+ return minx;
+ }
+
+ /**
+ * Maximum on x axis.
+ *
+ * @return Maximum on X
+ */
+ public double getMaxx() {
+ return maxx;
+ }
+
+ /**
+ * Minimum on y axis.
+ *
+ * @return Minimum on Y
+ */
+ public double getMiny() {
+ return miny;
+ }
+
+ /**
+ * Maximum on y axis.
+ *
+ * @return Maximum on Y
+ */
+ public double getMaxy() {
+ return maxy;
+ }
+
+ /**
+ * Curve X value at given position
+ *
+ * @param off Offset
+ * @return X value
+ */
+ public double getX(int off) {
+ return data.get(off << 1);
+ }
+
+ /**
+ * Curve Y value at given position
+ *
+ * @param off Offset
+ * @return Y value
+ */
+ public double getY(int off) {
+ return data.get((off << 1) + 1);
+ }
+
+ /**
+ * Size of curve.
+ *
+ * @return curve length
+ */
+ public int size() {
+ return data.size() >> 1;
+ }
+
+ /**
+ * Get an iterator for the curve.
+ *
+ * Note: this is <em>not</em> a Java style iterator, since the only way to get
+ * positions is using "next" in Java style. Here, we can have two getters for
+ * current values!
+ *
+ * Instead, use this style of iteration: <blockquote>
+ *
+ * <pre>
+ * {@code
+ * for (XYCurve.Itr it = curve.iterator(); it.valid(); it.advance()) {
+ * doSomethingWith(it.getX(), it.getY());
+ * }
+ * }
+ * </pre>
+ *
+ * </blockquote>
+ *
+ * @return Iterator
+ */
+ public Itr iterator() {
+ return new Itr();
+ }
+
+ @Override
+ public void writeToText(TextWriterStream out, String label) {
+ out.commentPrint(labelx);
+ out.commentPrint(" ");
+ out.commentPrint(labely);
+ out.flush();
+ for(int pos = 0; pos < data.size(); pos+=2) {
+ out.inlinePrint(data.get(pos));
+ out.inlinePrint(data.get(pos + 1));
+ out.flush();
+ }
+ }
+
+ @Override
+ public String toString() {
+ StringBuffer buf = new StringBuffer();
+ buf.append("XYCurve[");
+ buf.append(labelx).append(",").append(labely).append(":");
+ for(int pos = 0; pos < data.size(); pos += 2) {
+ buf.append(" ").append(data.get(pos)).append(",").append(data.get(pos + 1));
+ }
+ buf.append("]");
+ return buf.toString();
+ }
+
+ @Override
+ public String getLongName() {
+ return labelx + "-" + labely + "-Curve";
+ }
+
+ @Override
+ public String getShortName() {
+ return (labelx + "-" + labely + "-curve").toLowerCase();
+ }
+
+ /**
+ * Compute the area under curve for a curve
+ * <em>monotonously increasing in X</em>. You might need to relate this to the
+ * total area of the chart.
+ *
+ * @param curve Curve
+ * @return Area under curve.
+ */
+ public static double areaUnderCurve(XYCurve curve) {
+ TDoubleArrayList data = curve.data;
+ double prevx = data.get(0), prevy = data.get(1);
+ if(prevx > curve.minx) {
+ throw new UnsupportedOperationException("Curves must be monotone on X for areaUnderCurve to be valid.");
+ }
+ double area = 0.0;
+ for(int pos = 2; pos < data.size(); pos += 2) {
+ final double curx = data.get(pos), cury = data.get(pos + 1);
+ if(prevx > curx) {
+ throw new UnsupportedOperationException("Curves must be monotone on X for areaUnderCurve to be valid.");
+ }
+ area += (curx - prevx) * (prevy + cury) * .5; // .5 = mean Y
+ prevx = curx;
+ prevy = cury;
+ }
+ if(prevx < curve.maxx) {
+ throw new UnsupportedOperationException("Curves must be monotone on X for areaUnderCurve to be valid.");
+ }
+ return area;
+ }
+
+ /**
+ * Iterator for the curve. 2D, does not follow Java collections style. The
+ * reason is that we want to have {@code #getX()} and {@code #getY()}
+ * operations, which does not work consistently with Java's
+ * <code>next()</code> style of iterations.
+ *
+ * Instead, use this style of iteration: <blockquote>
+ *
+ * <pre>
+ * {@code
+ * for (XYCurve.Itr it = curve.iterator(); it.valid(); it.advance()) {
+ * doSomethingWith(it.getX(), it.getY());
+ * }
+ * }
+ * </pre>
+ *
+ * </blockquote>
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ */
+ public class Itr {
+ /**
+ * Iterator position
+ */
+ protected int pos = 0;
+
+ /**
+ * Get x value of current element.
+ *
+ * @return X value of current element
+ */
+ public double getX() {
+ return data.get(pos);
+ }
+
+ /**
+ * Get y value of current element.
+ *
+ * @return Y value of current element
+ */
+ public double getY() {
+ return data.get(pos + 1);
+ }
+
+ /**
+ * Advance the iterator to the next position.
+ */
+ public void advance() {
+ pos += 2;
+ }
+
+ /**
+ * Test if the iterator can advance.
+ *
+ * @return True when the iterator can be advanced.
+ */
+ public boolean valid() {
+ return pos < data.size();
+ }
+ }
+} \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/math/histograms/AggregatingHistogram.java b/src/de/lmu/ifi/dbs/elki/math/histograms/AggregatingHistogram.java
index baa9eb54..87b93910 100644
--- a/src/de/lmu/ifi/dbs/elki/math/histograms/AggregatingHistogram.java
+++ b/src/de/lmu/ifi/dbs/elki/math/histograms/AggregatingHistogram.java
@@ -34,7 +34,7 @@ import de.lmu.ifi.dbs.elki.utilities.pairs.Pair;
*
* @author Erich Schubert
*
- * @apiviz.composedOf de.lmu.ifi.dbs.elki.math.AggregatingHistogram.Adapter
+ * @apiviz.composedOf de.lmu.ifi.dbs.elki.math.histograms.AggregatingHistogram.Adapter
*
* @param <T> Type of data in histogram
* @param <D> Type of input data
diff --git a/src/de/lmu/ifi/dbs/elki/math/histograms/FlexiHistogram.java b/src/de/lmu/ifi/dbs/elki/math/histograms/FlexiHistogram.java
index cee151ed..07d20dd2 100644
--- a/src/de/lmu/ifi/dbs/elki/math/histograms/FlexiHistogram.java
+++ b/src/de/lmu/ifi/dbs/elki/math/histograms/FlexiHistogram.java
@@ -38,7 +38,7 @@ import de.lmu.ifi.dbs.elki.utilities.pairs.Pair;
*
* @author Erich Schubert
*
- * @apiviz.composedOf de.lmu.ifi.dbs.elki.math.FlexiHistogram.Adapter
+ * @apiviz.composedOf de.lmu.ifi.dbs.elki.math.histograms.FlexiHistogram.Adapter
*
* @param <T> Type of data in histogram
* @param <D> Type of input data
diff --git a/src/de/lmu/ifi/dbs/elki/math/histograms/ReplacingHistogram.java b/src/de/lmu/ifi/dbs/elki/math/histograms/ReplacingHistogram.java
index 304a434b..515fa016 100644
--- a/src/de/lmu/ifi/dbs/elki/math/histograms/ReplacingHistogram.java
+++ b/src/de/lmu/ifi/dbs/elki/math/histograms/ReplacingHistogram.java
@@ -37,7 +37,7 @@ import de.lmu.ifi.dbs.elki.utilities.pairs.IntIntPair;
*
* @author Erich Schubert
*
- * @apiviz.composedOf de.lmu.ifi.dbs.elki.math.ReplacingHistogram.Adapter
+ * @apiviz.composedOf de.lmu.ifi.dbs.elki.math.histograms.ReplacingHistogram.Adapter
*
* @param <T> Histogram data type.
*/
diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/Centroid.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/Centroid.java
index cf3bb25b..a9939a56 100644
--- a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/Centroid.java
+++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/Centroid.java
@@ -24,7 +24,8 @@ package de.lmu.ifi.dbs.elki.math.linearalgebra;
*/
import de.lmu.ifi.dbs.elki.data.NumberVector;
-import de.lmu.ifi.dbs.elki.database.ids.DBID;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.utilities.DatabaseUtil;
@@ -168,8 +169,8 @@ public class Centroid extends Vector {
*/
public static Centroid make(Relation<? extends NumberVector<?, ?>> relation) {
Centroid c = new Centroid(DatabaseUtil.dimensionality(relation));
- for(DBID id : relation.iterDBIDs()) {
- c.put(relation.get(id));
+ for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
+ c.put(relation.get(iditer));
}
return c;
}
@@ -180,10 +181,10 @@ public class Centroid extends Vector {
* @param relation Relation to use
* @param ids IDs to use
*/
- public static Centroid make(Relation<? extends NumberVector<?, ?>> relation, Iterable<DBID> ids) {
+ public static Centroid make(Relation<? extends NumberVector<?, ?>> relation, DBIDs ids) {
Centroid c = new Centroid(DatabaseUtil.dimensionality(relation));
- for(DBID id : ids) {
- c.put(relation.get(id));
+ for(DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
+ c.put(relation.get(iter));
}
return c;
}
diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/CovarianceMatrix.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/CovarianceMatrix.java
index 5d9d27a1..a416b917 100644
--- a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/CovarianceMatrix.java
+++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/CovarianceMatrix.java
@@ -24,7 +24,8 @@ package de.lmu.ifi.dbs.elki.math.linearalgebra;
*/
import de.lmu.ifi.dbs.elki.data.NumberVector;
-import de.lmu.ifi.dbs.elki.database.ids.DBID;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.utilities.DatabaseUtil;
@@ -344,8 +345,8 @@ public class CovarianceMatrix {
*/
public static CovarianceMatrix make(Relation<? extends NumberVector<?, ?>> relation) {
CovarianceMatrix c = new CovarianceMatrix(DatabaseUtil.dimensionality(relation));
- for(DBID id : relation.iterDBIDs()) {
- c.put(relation.get(id));
+ for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
+ c.put(relation.get(iditer));
}
return c;
}
@@ -356,10 +357,10 @@ public class CovarianceMatrix {
* @param relation Relation to use.
* @param ids IDs to add
*/
- public static CovarianceMatrix make(Relation<? extends NumberVector<?, ?>> relation, Iterable<DBID> ids) {
+ public static CovarianceMatrix make(Relation<? extends NumberVector<?, ?>> relation, DBIDs ids) {
CovarianceMatrix c = new CovarianceMatrix(DatabaseUtil.dimensionality(relation));
- for(DBID id : ids) {
- c.put(relation.get(id));
+ for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
+ c.put(relation.get(iter));
}
return c;
}
diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/LinearEquationSystem.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/LinearEquationSystem.java
index b6a53aa2..80cbe1e6 100644
--- a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/LinearEquationSystem.java
+++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/LinearEquationSystem.java
@@ -733,7 +733,7 @@ public class LinearEquationSystem {
if(value < 10) {
return 1;
}
- return (int) (Math.log(value) / Math.log(10) + 1);
+ return (int) Math.log10(value) + 1;
}
/**
diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/Matrix.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/Matrix.java
index f64b1129..eec21404 100644
--- a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/Matrix.java
+++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/Matrix.java
@@ -45,7 +45,6 @@ import de.lmu.ifi.dbs.elki.utilities.FormatUtil;
* @author Elke Achtert
* @author Erich Schubert
*
- * @apiviz.uses MatrixLike oneway - - reads
* @apiviz.uses Vector
* @apiviz.landmark
*/
diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/ProjectedCentroid.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/ProjectedCentroid.java
index 02b5b424..435ac3d7 100644
--- a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/ProjectedCentroid.java
+++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/ProjectedCentroid.java
@@ -26,7 +26,8 @@ package de.lmu.ifi.dbs.elki.math.linearalgebra;
import java.util.BitSet;
import de.lmu.ifi.dbs.elki.data.NumberVector;
-import de.lmu.ifi.dbs.elki.database.ids.DBID;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.utilities.DatabaseUtil;
@@ -134,8 +135,8 @@ public class ProjectedCentroid extends Centroid {
public static ProjectedCentroid make(BitSet dims, Relation<? extends NumberVector<?, ?>> relation) {
ProjectedCentroid c = new ProjectedCentroid(dims, DatabaseUtil.dimensionality(relation));
assert (dims.size() <= DatabaseUtil.dimensionality(relation));
- for(DBID id : relation.iterDBIDs()) {
- c.put(relation.get(id));
+ for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
+ c.put(relation.get(iditer));
}
return c;
}
@@ -147,11 +148,11 @@ public class ProjectedCentroid extends Centroid {
* @param relation Relation to process
* @param ids IDs to process
*/
- public static ProjectedCentroid make(BitSet dims, Relation<? extends NumberVector<?, ?>> relation, Iterable<DBID> ids) {
+ public static ProjectedCentroid make(BitSet dims, Relation<? extends NumberVector<?, ?>> relation, DBIDs ids) {
ProjectedCentroid c = new ProjectedCentroid(dims, DatabaseUtil.dimensionality(relation));
assert (dims.size() <= DatabaseUtil.dimensionality(relation));
- for(DBID id : ids) {
- c.put(relation.get(id));
+ for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
+ c.put(relation.get(iter));
}
return c;
}
diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/DropEigenPairFilter.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/DropEigenPairFilter.java
new file mode 100644
index 00000000..1657a096
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/DropEigenPairFilter.java
@@ -0,0 +1,146 @@
+package de.lmu.ifi.dbs.elki.math.linearalgebra.pca;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2012
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import java.util.ArrayList;
+import java.util.List;
+
+import de.lmu.ifi.dbs.elki.math.linearalgebra.EigenPair;
+import de.lmu.ifi.dbs.elki.math.linearalgebra.SortedEigenPairs;
+import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterEqualConstraint;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter;
+
+/**
+ * The "drop" filter looks for the largest drop in normalized relative
+ * eigenvalues.
+ *
+ * Let s_1 .. s_n be the eigenvalues.
+ *
+ * Let a_k := 1/(n-k) sum_{i=k..n} s_i
+ *
+ * Then r_k := s_k / a_k is the relative eigenvalue.
+ *
+ * The drop filter searches for argmax_k r_k / r_{k+1}
+ *
+ * @author Erich Schubert
+ */
+@Title("Drop EigenPair Filter")
+public class DropEigenPairFilter implements EigenPairFilter {
+ /**
+ * The default value for walpha. Not used by default, we're going for maximum
+ * contrast only.
+ */
+ public static final double DEFAULT_WALPHA = 0.0;
+
+ /**
+ * The noise tolerance level for weak eigenvectors
+ */
+ private double walpha = 0.0;
+
+ /**
+ * Constructor.
+ *
+ * @param walpha
+ */
+ public DropEigenPairFilter(double walpha) {
+ super();
+ this.walpha = walpha;
+ }
+
+ @Override
+ public FilteredEigenPairs filter(SortedEigenPairs eigenPairs) {
+ // init strong and weak eigenpairs
+ List<EigenPair> strongEigenPairs = new ArrayList<EigenPair>();
+ List<EigenPair> weakEigenPairs = new ArrayList<EigenPair>();
+
+ // default value is "all strong".
+ int contrastMaximum = eigenPairs.size() - 1;
+ double maxContrast = 0.0;
+
+ double[] ev = eigenPairs.eigenValues();
+ // calc the eigenvalue sum.
+ double eigenValueSum = 0.0;
+ for(int i = 0; i < ev.length; i++) {
+ eigenValueSum += ev[i];
+ }
+ // Minimum value
+ final double weakEigenvalue = walpha * eigenValueSum / ev.length;
+ // Now find the maximum contrast, scanning backwards.
+ double prev_sum = ev[ev.length - 1];
+ double prev_rel = 1.0;
+ for(int i = 2; i <= ev.length; i++) {
+ double curr_sum = prev_sum + ev[ev.length - i];
+ double curr_rel = ev[ev.length - i] / curr_sum * i;
+ // not too weak?
+ if(ev[ev.length - i] >= weakEigenvalue) {
+ double contrast = curr_rel - prev_rel;
+ if(contrast > maxContrast) {
+ maxContrast = contrast;
+ contrastMaximum = ev.length - i;
+ }
+ }
+ prev_sum = curr_sum;
+ prev_rel = curr_rel;
+ }
+
+ for(int i = 0; i <= contrastMaximum; i++) {
+ EigenPair eigenPair = eigenPairs.getEigenPair(i);
+ strongEigenPairs.add(eigenPair);
+ }
+ for(int i = contrastMaximum + 1; i < eigenPairs.size(); i++) {
+ EigenPair eigenPair = eigenPairs.getEigenPair(i);
+ weakEigenPairs.add(eigenPair);
+ }
+
+ return new FilteredEigenPairs(weakEigenPairs, strongEigenPairs);
+ }
+
+ /**
+ * Parameterization class.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ */
+ public static class Parameterizer extends AbstractParameterizer {
+ private double walpha;
+
+ @Override
+ protected void makeOptions(Parameterization config) {
+ super.makeOptions(config);
+ DoubleParameter walphaP = new DoubleParameter(WeakEigenPairFilter.EIGENPAIR_FILTER_WALPHA, new GreaterEqualConstraint(0.0), DEFAULT_WALPHA);
+ if(config.grab(walphaP)) {
+ walpha = walphaP.getValue();
+ }
+ }
+
+ @Override
+ protected DropEigenPairFilter makeInstance() {
+ return new DropEigenPairFilter(walpha);
+ }
+ }
+} \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/FilteredEigenPairs.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/FilteredEigenPairs.java
index a2c83249..94c00a7e 100644
--- a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/FilteredEigenPairs.java
+++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/FilteredEigenPairs.java
@@ -31,9 +31,9 @@ import java.util.List;
* Encapsulates weak and strong eigenpairs that have been filtered out
* by an eigenpair filter.
*
- * @author Elke Achtert
+ * @author Elke Achtert
*
- * @apiviz.has de.lmu.ifi.dbs.elki.math.linearalgebra.EigenPair
+ * @apiviz.has EigenPair
*/
public class FilteredEigenPairs {
/**
diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/PCAFilteredAutotuningRunner.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/PCAFilteredAutotuningRunner.java
index 8b53dc43..157dfedc 100644
--- a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/PCAFilteredAutotuningRunner.java
+++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/PCAFilteredAutotuningRunner.java
@@ -32,6 +32,7 @@ import java.util.List;
import de.lmu.ifi.dbs.elki.data.NumberVector;
import de.lmu.ifi.dbs.elki.database.ids.DBID;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
import de.lmu.ifi.dbs.elki.database.query.DistanceResultPair;
import de.lmu.ifi.dbs.elki.database.query.DoubleDistanceResultPair;
@@ -76,7 +77,8 @@ public class PCAFilteredAutotuningRunner<V extends NumberVector<? extends V, ?>>
// be L2-spherical to be unbiased.
V center = DatabaseUtil.centroid(database, ids);
List<DoubleDistanceResultPair> dres = new ArrayList<DoubleDistanceResultPair>(ids.size());
- for(DBID id : ids) {
+ for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
+ DBID id = iter.getDBID();
final double dist = EuclideanDistanceFunction.STATIC.doubleDistance(center, database.get(id));
dres.add(new DoubleDistanceResultPair(dist, id));
}
diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/PCAFilteredResult.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/PCAFilteredResult.java
index 4aa626a9..3e79e7b6 100644
--- a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/PCAFilteredResult.java
+++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/PCAFilteredResult.java
@@ -30,7 +30,6 @@ import de.lmu.ifi.dbs.elki.math.linearalgebra.EigenPair;
import de.lmu.ifi.dbs.elki.math.linearalgebra.Matrix;
import de.lmu.ifi.dbs.elki.math.linearalgebra.ProjectionResult;
import de.lmu.ifi.dbs.elki.math.linearalgebra.SortedEigenPairs;
-import de.lmu.ifi.dbs.elki.utilities.Util;
/**
* Result class for a filtered PCA. This differs from regular PCA by having the
@@ -89,10 +88,9 @@ public class PCAFilteredResult extends PCAResult implements ProjectionResult {
private Matrix m_czech;
/**
- * The diagonal matrix of adapted strong eigenvalues: eigenvectors *
- * e_czech.
+ * The diagonal matrix of adapted strong eigenvalues: eigenvectors * e_czech.
*/
- private Matrix adapatedStrongEigenvectors;
+ private Matrix adapatedStrongEigenvectors = null;
/**
* Construct a result object for the filtered PCA result.
@@ -152,51 +150,49 @@ public class PCAFilteredResult extends PCAResult implements ProjectionResult {
}
}
- // TODO: unnecessary copy.
Matrix V = getEigenvectors();
- adapatedStrongEigenvectors = V.times(e_czech).times(Matrix.identity(dim, localdim));
m_hat = V.times(e_hat).timesTranspose(V);
m_czech = V.times(e_czech).timesTranspose(V);
}
/**
- * Returns a copy of the matrix of strong eigenvectors after passing the eigen
- * pair filter.
+ * Returns the matrix of strong eigenvectors after passing the eigen pair
+ * filter.
*
* @return the matrix of eigenvectors
*/
public final Matrix getStrongEigenvectors() {
- return strongEigenvectors.copy();
+ return strongEigenvectors;
}
/**
- * Returns a copy of the strong eigenvalues of the object after passing the
- * eigen pair filter.
+ * Returns the strong eigenvalues of the object after passing the eigen pair
+ * filter.
*
* @return the eigenvalues
*/
public final double[] getStrongEigenvalues() {
- return Util.copy(strongEigenvalues);
+ return strongEigenvalues;
}
/**
- * Returns a copy of the matrix of weak eigenvectors after passing the eigen
- * pair filter.
+ * Returns the matrix of weak eigenvectors after passing the eigen pair
+ * filter.
*
* @return the matrix of eigenvectors
*/
public final Matrix getWeakEigenvectors() {
- return weakEigenvectors.copy();
+ return weakEigenvectors;
}
/**
- * Returns a copy of the weak eigenvalues of the object after passing the
- * eigen pair filter.
+ * Returns the weak eigenvalues of the object after passing the eigen pair
+ * filter.
*
* @return the eigenvalues
*/
public final double[] getWeakEigenvalues() {
- return Util.copy(weakEigenvalues);
+ return weakEigenvalues;
}
/**
@@ -219,50 +215,54 @@ public class PCAFilteredResult extends PCAResult implements ProjectionResult {
}
/**
- * Returns a copy of the selection matrix of the weak eigenvectors (E_hat) of
+ * Returns the selection matrix of the weak eigenvectors (E_hat) of
* the object to which this PCA belongs to.
*
* @return the selection matrix of the weak eigenvectors E_hat
*/
public Matrix selectionMatrixOfWeakEigenvectors() {
- return e_hat.copy();
+ return e_hat;
}
/**
- * Returns a copy of the selection matrix of the strong eigenvectors (E_czech)
+ * Returns the selection matrix of the strong eigenvectors (E_czech)
* of this LocalPCA.
*
* @return the selection matrix of the weak eigenvectors E_czech
*/
public Matrix selectionMatrixOfStrongEigenvectors() {
- return e_czech.copy();
+ return e_czech;
}
/**
- * Returns a copy of the similarity matrix (M_hat) of this LocalPCA.
+ * Returns the similarity matrix (M_hat) of this LocalPCA.
*
* @return the similarity matrix M_hat
*/
@Override
public Matrix similarityMatrix() {
- return m_hat.copy();
+ return m_hat;
}
/**
- * Returns a copy of the dissimilarity matrix (M_czech) of this LocalPCA.
+ * Returns the dissimilarity matrix (M_czech) of this LocalPCA.
*
* @return the dissimilarity matrix M_hat
*/
public Matrix dissimilarityMatrix() {
- return m_czech.copy();
+ return m_czech;
}
-
+
/**
- * Returns a copy of the adapted strong eigenvectors.
- *
+ * Returns the adapted strong eigenvectors.
+ *
* @return the adapted strong eigenvectors
*/
public Matrix adapatedStrongEigenvectors() {
- return adapatedStrongEigenvectors.copy();
+ if (adapatedStrongEigenvectors == null) {
+ final Matrix ev = getEigenvectors();
+ adapatedStrongEigenvectors = ev.times(e_czech).times(Matrix.identity(ev.getRowDimensionality(), strongEigenvalues.length));
+ }
+ return adapatedStrongEigenvectors;
}
} \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/PCAResult.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/PCAResult.java
index 6969a3a3..87518b1d 100644
--- a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/PCAResult.java
+++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/PCAResult.java
@@ -25,7 +25,6 @@ package de.lmu.ifi.dbs.elki.math.linearalgebra.pca;
import de.lmu.ifi.dbs.elki.math.linearalgebra.Matrix;
import de.lmu.ifi.dbs.elki.math.linearalgebra.SortedEigenPairs;
-import de.lmu.ifi.dbs.elki.utilities.Util;
/**
* Result class for Principal Component Analysis with some convenience methods
@@ -33,14 +32,14 @@ import de.lmu.ifi.dbs.elki.utilities.Util;
* @author Erich Schubert
*
* @apiviz.landmark
- * @apiviz.has de.lmu.ifi.dbs.elki.math.linearalgebra.SortedEigenPairs
+ * @apiviz.has SortedEigenPairs
*/
public class PCAResult {
/**
* The eigenpairs in decreasing order.
*/
private SortedEigenPairs eigenPairs;
-
+
/**
* The eigenvalues in decreasing order.
*/
@@ -73,44 +72,46 @@ public class PCAResult {
*/
public PCAResult(SortedEigenPairs eigenPairs) {
super();
- // TODO: we might want to postpone the instantiation of eigenvalue and eigenvectors.
+ // TODO: we might want to postpone the instantiation of eigenvalue and
+ // eigenvectors.
this.eigenPairs = eigenPairs;
this.eigenvalues = eigenPairs.eigenValues();
this.eigenvectors = eigenPairs.eigenVectors();
}
/**
- * Returns a copy of the matrix of eigenvectors of the object to which this
- * PCA belongs to.
+ * Returns the matrix of eigenvectors of the object to which this PCA belongs
+ * to.
*
* @return the matrix of eigenvectors
*/
public final Matrix getEigenvectors() {
- return eigenvectors.copy();
+ return eigenvectors;
}
/**
- * Returns a copy of the eigenvalues of the object to which this PCA belongs
- * to in decreasing order.
+ * Returns the eigenvalues of the object to which this PCA belongs to in
+ * decreasing order.
*
* @return the eigenvalues
*/
public final double[] getEigenvalues() {
- return Util.copy(eigenvalues);
+ return eigenvalues;
}
/**
- * Returns a copy of the eigenpairs of the object to which this PCA belongs to
+ * Returns the eigenpairs of the object to which this PCA belongs to
* in decreasing order.
*
* @return the eigenpairs
*/
public final SortedEigenPairs getEigenPairs() {
- return eigenPairs.copy();
+ return eigenPairs;
}
/**
* Returns the number of eigenvectors stored
+ *
* @return length
*/
public final int length() {
diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/SignificantEigenPairFilter.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/SignificantEigenPairFilter.java
index 94636553..ab04cbb5 100644
--- a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/SignificantEigenPairFilter.java
+++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/SignificantEigenPairFilter.java
@@ -76,9 +76,6 @@ public class SignificantEigenPairFilter implements EigenPairFilter {
this.walpha = walpha;
}
- /**
- * Filter eigenpairs
- */
@Override
public FilteredEigenPairs filter(SortedEigenPairs eigenPairs) {
// init strong and weak eigenpairs
diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/WeightedCovarianceMatrixBuilder.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/WeightedCovarianceMatrixBuilder.java
index db5e8702..fb7f60c3 100644
--- a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/WeightedCovarianceMatrixBuilder.java
+++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/WeightedCovarianceMatrixBuilder.java
@@ -27,7 +27,7 @@ import java.util.Collection;
import java.util.Iterator;
import de.lmu.ifi.dbs.elki.data.NumberVector;
-import de.lmu.ifi.dbs.elki.database.ids.DBID;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
import de.lmu.ifi.dbs.elki.database.query.DistanceResultPair;
import de.lmu.ifi.dbs.elki.database.query.DoubleDistanceResultPair;
@@ -123,8 +123,8 @@ public class WeightedCovarianceMatrixBuilder<V extends NumberVector<? extends V,
double maxdist = 0.0;
double stddev = 0.0;
{
- for(Iterator<DBID> it = ids.iterator(); it.hasNext();) {
- V obj = database.get(it.next());
+ for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
+ V obj = database.get(iter);
double distance = weightDistance.distance(centroid, obj).doubleValue();
stddev += distance * distance;
if(distance > maxdist) {
@@ -138,8 +138,8 @@ public class WeightedCovarianceMatrixBuilder<V extends NumberVector<? extends V,
stddev = Math.sqrt(stddev / ids.size());
}
- for(Iterator<DBID> it = ids.iterator(); it.hasNext();) {
- V obj = database.get(it.next());
+ for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
+ V obj = database.get(iter);
double distance = weightDistance.distance(centroid, obj).doubleValue();
double weight = weightfunction.getWeight(distance, maxdist, stddev);
cmat.put(obj, weight);
@@ -204,7 +204,7 @@ public class WeightedCovarianceMatrixBuilder<V extends NumberVector<? extends V,
dist = res.getDistance().doubleValue();
}
- V obj = database.get(res.getDBID());
+ V obj = database.get(res);
double weight = weightfunction.getWeight(dist, maxdist, stddev);
cmat.put(obj, weight);
}
diff --git a/src/de/lmu/ifi/dbs/elki/math/scales/Scales.java b/src/de/lmu/ifi/dbs/elki/math/scales/Scales.java
index ffaaffc7..edad211d 100644
--- a/src/de/lmu/ifi/dbs/elki/math/scales/Scales.java
+++ b/src/de/lmu/ifi/dbs/elki/math/scales/Scales.java
@@ -24,7 +24,7 @@ package de.lmu.ifi.dbs.elki.math.scales;
*/
import de.lmu.ifi.dbs.elki.data.NumberVector;
-import de.lmu.ifi.dbs.elki.database.ids.DBID;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.math.DoubleMinMax;
import de.lmu.ifi.dbs.elki.utilities.DatabaseUtil;
@@ -57,8 +57,8 @@ public class Scales {
LinearScale scales[] = new LinearScale[dim];
// analyze data
- for(DBID objId : db.iterDBIDs()) {
- O v = db.get(objId);
+ for(DBIDIter iditer = db.iterDBIDs(); iditer.valid(); iditer.advance()) {
+ O v = db.get(iditer);
for(int d = 0; d < dim; d++) {
minmax[d].put(v.doubleValue(d+1));
}
diff --git a/src/de/lmu/ifi/dbs/elki/math/spacefillingcurves/HilbertSpatialSorter.java b/src/de/lmu/ifi/dbs/elki/math/spacefillingcurves/HilbertSpatialSorter.java
index 82e41337..9b4af341 100644
--- a/src/de/lmu/ifi/dbs/elki/math/spacefillingcurves/HilbertSpatialSorter.java
+++ b/src/de/lmu/ifi/dbs/elki/math/spacefillingcurves/HilbertSpatialSorter.java
@@ -45,7 +45,7 @@ import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
*
* @author Erich Schubert
*
- * @apiviz.uses HilbertRef
+ * @apiviz.composedOf HilbertRef
*/
@Reference(authors = "D. Hilbert", title = "Über die stetige Abbildung einer Linie auf ein Flächenstück", booktitle = "Mathematische Annalen, 38(3)")
public class HilbertSpatialSorter extends AbstractSpatialSorter {
diff --git a/src/de/lmu/ifi/dbs/elki/math/spacefillingcurves/ZCurve.java b/src/de/lmu/ifi/dbs/elki/math/spacefillingcurves/ZCurve.java
deleted file mode 100644
index 38f45aef..00000000
--- a/src/de/lmu/ifi/dbs/elki/math/spacefillingcurves/ZCurve.java
+++ /dev/null
@@ -1,264 +0,0 @@
-package de.lmu.ifi.dbs.elki.math.spacefillingcurves;
-
-/*
- This file is part of ELKI:
- Environment for Developing KDD-Applications Supported by Index-Structures
-
- Copyright (C) 2012
- Ludwig-Maximilians-Universität München
- Lehr- und Forschungseinheit für Datenbanksysteme
- ELKI Development Team
-
- This program is free software: you can redistribute it and/or modify
- it under the terms of the GNU Affero General Public License as published by
- the Free Software Foundation, either version 3 of the License, or
- (at your option) any later version.
-
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU Affero General Public License for more details.
-
- You should have received a copy of the GNU Affero General Public License
- along with this program. If not, see <http://www.gnu.org/licenses/>.
- */
-
-import java.math.BigInteger;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.List;
-
-import de.lmu.ifi.dbs.elki.data.NumberVector;
-import de.lmu.ifi.dbs.elki.database.ids.DBID;
-import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
-import de.lmu.ifi.dbs.elki.database.relation.Relation;
-import de.lmu.ifi.dbs.elki.logging.Logging;
-import de.lmu.ifi.dbs.elki.utilities.DatabaseUtil;
-import de.lmu.ifi.dbs.elki.utilities.FormatUtil;
-
-/**
- * Computes the z-values for specified double values.
- *
- * @author Elke Achtert
- */
-public class ZCurve {
- /**
- * The logger of this class.
- */
- private static final Logging logger = Logging.getLogger(ZCurve.class);
-
- /**
- * Fake Constructor - use the static methods instead!
- */
- private ZCurve() {
- // nothing to do.
- }
-
- /**
- * Computes the z-values for the specified double values.
- *
- * @param valuesList the list of double values
- * @return the z-values for the specified double values
- */
- public static List<byte[]> zValues(List<double[]> valuesList) {
- if(valuesList.isEmpty()) {
- return new ArrayList<byte[]>();
- }
-
- // determine min and max value in each dimension and the scaling factor
- int dimensionality = valuesList.get(0).length;
- double[] minValues = new double[dimensionality];
- double[] maxValues = new double[dimensionality];
- Arrays.fill(minValues, Double.POSITIVE_INFINITY);
- Arrays.fill(maxValues, Double.NEGATIVE_INFINITY);
- for(double[] values : valuesList) {
- for(int d = 0; d < dimensionality; d++) {
- maxValues[d] = Math.max(values[d], maxValues[d]);
- minValues[d] = Math.min(values[d], minValues[d]);
- }
- }
-
- double[] normalizationFactors = new double[dimensionality];
- for(int d = 0; d < dimensionality; d++) {
- // has to be > 0!!!
- normalizationFactors[d] = (maxValues[d] - minValues[d]);
- }
-
- if(logger.isDebugging()) {
- StringBuffer msg = new StringBuffer();
- msg.append("min ").append(FormatUtil.format(minValues));
- msg.append("\nmax ").append(FormatUtil.format(maxValues));
- msg.append("\nscale ").append(FormatUtil.format(normalizationFactors));
- msg.append("\nLong.MAX_VALUE " + Long.MAX_VALUE);
- msg.append("\nLong.MIN_VALUE " + Long.MIN_VALUE);
- logger.debugFine(msg.toString());
- }
-
- // discretize the double value over the whole domain
- final List<byte[]> zValues = new ArrayList<byte[]>();
- for(double[] values : valuesList) {
- // convert the double values to long values
- long[] longValues = new long[values.length];
- for(int d = 0; d < values.length; d++) {
- // normalize to 0:1
- final double normval = ((values[d] - minValues[d]) / normalizationFactors[d]);
- longValues[d] = (long) (normval * Long.MAX_VALUE);
- }
-
- if(logger.isDebugging()) {
- StringBuffer msg = new StringBuffer();
- msg.append("double values ").append(FormatUtil.format(values));
- msg.append("\nlong values ").append(FormatUtil.format(longValues));
- logger.debugFine(msg.toString());
- }
- byte[] zValue = zValue(longValues);
- zValues.add(zValue);
- }
-
- return zValues;
- }
-
- /**
- * Computes the z-value for the specified long values
- *
- * @param longValues the array of the discretized double values
- * @return the z-value for the specified long values
- */
- private static byte[] zValue(long[] longValues) {
- final int numdim = longValues.length;
- final int numbits = numdim * 64;
- byte[] zValues = new byte[numbits / 8];
-
- // convert longValues into zValues
- for(int bitnum = 0; bitnum < numbits; bitnum ++) {
- // split into in-byte and in-array indexes
- final int lowpos = bitnum & 7;
- final int higpos = bitnum >> 3;
-
- final int dim = bitnum % numdim;
- final int shift = 63 - (bitnum / numdim);
- final byte val = (byte) ((longValues[dim] >> shift) & 1);
-
- zValues[higpos] |= val << (7 - lowpos);
- }
- /*for(int shift = 0; shift < 64; shift++) {
- for(int dim = 0; dim < longValues.length; dim++) {
- // destination bit position
- final int bitpos = shift * longValues.length + dim;
- // split into in-byte and in-array indexes
- final int lowpos = bitpos & 7;
- final int higpos = bitpos >> 3;
- zValues[higpos] |= ((longValues[dim] >> (shift)) & 1) << lowpos;
- }
- }*/
-
- if(logger.isDebugging()) {
- // convert zValues to longValues
- long[] loutput = new long[longValues.length];
- for(int shift = 0; shift < 64; shift++) {
- for(int dim = 0; dim < longValues.length; dim++) {
- // destination bit position
- final int bitpos = shift * longValues.length + dim;
- // split into in-byte and in-array indexes
- final int lowpos = bitpos & 7;
- final int higpos = bitpos >> 3;
- loutput[dim] |= ((long) (((zValues[higpos] >> (lowpos)) & 1))) << (shift);
- }
- }
- StringBuffer msg = new StringBuffer();
- msg.append("reconstructed values: ").append(FormatUtil.format(loutput));
- logger.debugFine(msg.toString());
- }
-
- return zValues;
- }
-
- /**
- * Class to transform a relation to its Z coordinates.
- *
- * @author Erich Schubert
- */
- public static class Transformer {
- /**
- * Maximum values in each dimension
- */
- private final double[] maxValues;
-
- /**
- * Minimum values in each dimension
- */
- private final double[] minValues;
-
- /**
- * Dimensionality
- */
- private final int dimensionality;
-
- /**
- * Constructor.
- *
- * @param relation Relation to transform
- * @param ids IDs subset to process
- */
- public Transformer(Relation<? extends NumberVector<?, ?>> relation, DBIDs ids) {
- this.dimensionality = DatabaseUtil.dimensionality(relation);
- this.minValues = new double[dimensionality];
- this.maxValues = new double[dimensionality];
-
- // Compute scaling of vector space
- Arrays.fill(minValues, Double.POSITIVE_INFINITY);
- Arrays.fill(maxValues, Double.NEGATIVE_INFINITY);
- for(DBID id : ids) {
- NumberVector<?, ?> vector = relation.get(id);
- for(int dim = 0; dim < dimensionality; ++dim) {
- double dimValue = vector.doubleValue(dim + 1);
- minValues[dim] = Math.min(minValues[dim], dimValue);
- maxValues[dim] = Math.max(maxValues[dim], dimValue);
- }
- }
- }
-
- /**
- * Transform a single vector.
- *
- * @param vector Vector to transform
- * @return Z curve value as bigint
- */
- public BigInteger asBigInteger(NumberVector<?, ?> vector) {
- return new BigInteger(asByteArray(vector));
- }
-
- /**
- * Transform a single vector.
- *
- * @param vector Vector to transform
- * @return Z curve value as byte array
- */
- public byte[] asByteArray(NumberVector<?, ?> vector) {
- final long[] longValueList = new long[dimensionality];
-
- for(int dim = 0; dim < dimensionality; ++dim) {
- final double minValue = minValues[dim];
- final double maxValue = maxValues[dim];
- double dimValue = vector.doubleValue(dim + 1);
-
- dimValue = (dimValue - minValue) / (maxValue - minValue);
- longValueList[dim] = (long) (dimValue * (Long.MAX_VALUE));
- }
-
- final byte[] bytes = new byte[Long.SIZE * dimensionality * (Long.SIZE / Byte.SIZE)];
- int shiftCounter = 0;
- for(int i = 0; i < Long.SIZE; ++i) {
- for(int dim = 0; dim < dimensionality; ++dim) {
- long byteValue = longValueList[dim];
-
- int localShift = shiftCounter % Byte.SIZE;
- bytes[(bytes.length - 1) - (shiftCounter / Byte.SIZE)] |= ((byteValue >> i) & 0x01) << localShift;
-
- shiftCounter++;
- }
- }
- return bytes;
- }
- }
-} \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/math/spacefillingcurves/ZCurveTransformer.java b/src/de/lmu/ifi/dbs/elki/math/spacefillingcurves/ZCurveTransformer.java
new file mode 100644
index 00000000..108721eb
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/math/spacefillingcurves/ZCurveTransformer.java
@@ -0,0 +1,124 @@
+package de.lmu.ifi.dbs.elki.math.spacefillingcurves;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2012
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import java.math.BigInteger;
+import java.util.Arrays;
+
+import de.lmu.ifi.dbs.elki.data.NumberVector;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
+import de.lmu.ifi.dbs.elki.database.relation.Relation;
+import de.lmu.ifi.dbs.elki.utilities.DatabaseUtil;
+
+/**
+ * Class to transform a relation to its Z coordinates.
+ *
+ * @author Erich Schubert
+ */
+public class ZCurveTransformer {
+ /**
+ * Maximum values in each dimension
+ */
+ private final double[] maxValues;
+
+ /**
+ * Minimum values in each dimension
+ */
+ private final double[] minValues;
+
+ /**
+ * Dimensionality
+ */
+ private final int dimensionality;
+
+ /**
+ * Constructor.
+ *
+ * @param relation Relation to transform
+ * @param ids IDs subset to process
+ */
+ public ZCurveTransformer(Relation<? extends NumberVector<?, ?>> relation, DBIDs ids) {
+ this.dimensionality = DatabaseUtil.dimensionality(relation);
+ this.minValues = new double[dimensionality];
+ this.maxValues = new double[dimensionality];
+
+ // Compute scaling of vector space
+ Arrays.fill(minValues, Double.POSITIVE_INFINITY);
+ Arrays.fill(maxValues, Double.NEGATIVE_INFINITY);
+ for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
+ NumberVector<?, ?> vector = relation.get(iter);
+ for(int dim = 0; dim < dimensionality; ++dim) {
+ double dimValue = vector.doubleValue(dim + 1);
+ minValues[dim] = Math.min(minValues[dim], dimValue);
+ maxValues[dim] = Math.max(maxValues[dim], dimValue);
+ }
+ }
+ }
+
+ /**
+ * Transform a single vector.
+ *
+ * @param vector Vector to transform
+ * @return Z curve value as bigint
+ */
+ @Deprecated
+ public BigInteger asBigInteger(NumberVector<?, ?> vector) {
+ return new BigInteger(asByteArray(vector));
+ }
+
+ /**
+ * Transform a single vector.
+ *
+ * @param vector Vector to transform
+ * @return Z curve value as byte array
+ */
+ public byte[] asByteArray(NumberVector<?, ?> vector) {
+ final long[] longValueList = new long[dimensionality];
+
+ for(int dim = 0; dim < dimensionality; ++dim) {
+ final double minValue = minValues[dim];
+ final double maxValue = maxValues[dim];
+ double dimValue = vector.doubleValue(dim + 1);
+
+ dimValue = (dimValue - minValue) / (maxValue - minValue);
+ longValueList[dim] = (long) (dimValue * (Long.MAX_VALUE));
+ }
+
+ final byte[] bytes = new byte[Long.SIZE * dimensionality * (Long.SIZE / Byte.SIZE)];
+ int shiftCounter = 0;
+ for(int i = 0; i < Long.SIZE; ++i) {
+ for(int dim = 0; dim < dimensionality; ++dim) {
+ long byteValue = longValueList[dim];
+
+ int localShift = shiftCounter % Byte.SIZE;
+ bytes[(bytes.length - 1) - (shiftCounter / Byte.SIZE)] |= ((byteValue >> i) & 0x01) << localShift;
+
+ shiftCounter++;
+ }
+ }
+ return bytes;
+ }
+
+} \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/BetaDistribution.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/BetaDistribution.java
new file mode 100644
index 00000000..1efe19dd
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/BetaDistribution.java
@@ -0,0 +1,499 @@
+package de.lmu.ifi.dbs.elki.math.statistics.distribution;
+
+import java.util.Random;
+
+import de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2012
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/**
+ * Beta Distribution with implementation of the regularized incomplete beta
+ * function
+ *
+ * @author Jan Brusis
+ * @author Erich Schubert
+ */
+public class BetaDistribution implements DistributionWithRandom {
+ /**
+ * Numerical precision to use
+ */
+ static final double NUM_PRECISION = 1E-15;
+
+ /**
+ * Limit of when to switch to quadrature method
+ */
+ static final double SWITCH = 3000;
+
+ /**
+ * Abscissas for Gauss-Legendre quadrature
+ */
+ static final double[] GAUSSLEGENDRE_Y = { 0.0021695375159141994, 0.011413521097787704, 0.027972308950302116, 0.051727015600492421, 0.082502225484340941, 0.12007019910960293, 0.16415283300752470, 0.21442376986779355, 0.27051082840644336, 0.33199876341447887, 0.39843234186401943, 0.46931971407375483, 0.54413605556657973, 0.62232745288031077, 0.70331500465597174, 0.78649910768313447, 0.87126389619061517, 0.95698180152629142 };
+
+ /**
+ * Weights for Gauss-Legendre quadrature
+ */
+ static final double[] GAUSSLEGENDRE_W = { 0.0055657196642445571, 0.012915947284065419, 0.020181515297735382, 0.027298621498568734, 0.034213810770299537, 0.040875750923643261, 0.047235083490265582, 0.053244713977759692, 0.058860144245324798, 0.064039797355015485, 0.068745323835736408, 0.072941885005653087, 0.076598410645870640, 0.079687828912071670, 0.082187266704339706, 0.084078218979661945, 0.085346685739338721, 0.085983275670394821 };
+
+ /**
+ * Shape parameter of beta distribution
+ */
+ private final double alpha;
+
+ /**
+ * Shape parameter of beta distribution
+ */
+ private final double beta;
+
+ /**
+ * For random number generation
+ */
+ private Random random;
+
+ /**
+ * Log beta(a, b) cache
+ */
+ private double logbab;
+
+ /**
+ * Constructor.
+ *
+ * @param a shape Parameter a
+ * @param b shape Parameter b
+ */
+ public BetaDistribution(double a, double b) {
+ this(a, b, new Random());
+ }
+
+ /**
+ * Constructor.
+ *
+ * @param a shape Parameter a
+ * @param b shape Parameter b
+ * @param random Random generator
+ */
+ public BetaDistribution(double a, double b, Random random) {
+ super();
+ if(a <= 0.0 || b <= 0.0) {
+ throw new IllegalArgumentException("Invalid parameters for Beta distribution.");
+ }
+
+ this.alpha = a;
+ this.beta = b;
+ this.logbab = logBeta(a, b);
+ this.random = random;
+ }
+
+ @Override
+ public double pdf(double val) {
+ if(val < 0. || val > 1.) {
+ return 0.;
+ }
+ if(val == 0.) {
+ if(alpha > 1.) {
+ return 0.;
+ }
+ if(alpha < 1.) {
+ return Double.POSITIVE_INFINITY;
+ }
+ return beta;
+ }
+ if(val == 1.) {
+ if(beta > 1.) {
+ return 0.;
+ }
+ if(beta < 1.) {
+ return Double.POSITIVE_INFINITY;
+ }
+ return alpha;
+ }
+ return Math.exp(-logbab + Math.log(val) * (alpha - 1) + Math.log1p(-val) * (beta - 1));
+ }
+
+ @Override
+ public double cdf(double x) {
+ if(alpha <= 0.0 || beta <= 0.0 || Double.isNaN(alpha) || Double.isNaN(beta) || Double.isNaN(x)) {
+ return Double.NaN;
+ }
+ if(x <= 0.0) {
+ return 0.0;
+ }
+ if(x >= 1.0) {
+ return 1.0;
+ }
+ if(alpha > SWITCH && beta > SWITCH) {
+ return regularizedIncBetaQuadrature(alpha, beta, x);
+ }
+ double bt = Math.exp(-logbab + alpha * Math.log(x) + beta * Math.log1p(-x));
+ if(x < (alpha + 1.0) / (alpha + beta + 2.0)) {
+ return bt * regularizedIncBetaCF(alpha, beta, x) / alpha;
+ }
+ else {
+ return 1.0 - bt * regularizedIncBetaCF(beta, alpha, 1.0 - x) / beta;
+ }
+ }
+
+ @Override
+ public double quantile(double x) {
+ // Valid parameters
+ if(x < 0 || x > 1 || Double.isNaN(x)) {
+ return Double.NaN;
+ }
+ if(x == 0) {
+ return 0.0;
+ }
+ if(x == 1) {
+ return 1.0;
+ }
+ // Simpler to compute inverse?
+ if(x > 0.5) {
+ return 1 - rawQuantile(1 - x, beta, alpha, logbab);
+ }
+ else {
+ return rawQuantile(x, alpha, beta, logbab);
+ }
+ }
+
+ @Override
+ public double nextRandom() {
+ double x = GammaDistribution.nextRandom(alpha, 1, random);
+ double y = GammaDistribution.nextRandom(beta, 1, random);
+ return x / (x + y);
+ }
+
+ @Override
+ public String toString() {
+ return "BetaDistribution(alpha=" + alpha + ", beta=" + beta + ")";
+ }
+
+ /**
+ * Static version of the CDF of the beta distribution
+ *
+ * @param val Value
+ * @param alpha Shape parameter a
+ * @param beta Shape parameter b
+ * @return cumulative density
+ */
+ public static double cdf(double val, double alpha, double beta) {
+ return regularizedIncBeta(val, alpha, beta);
+ }
+
+ /**
+ * Static version of the PDF of the beta distribution
+ *
+ * @param val Value
+ * @param alpha Shape parameter a
+ * @param beta Shape parameter b
+ * @return probability density
+ */
+ public static double pdf(double val, double alpha, double beta) {
+ if(alpha <= 0. || beta <= 0. || Double.isNaN(alpha) || Double.isNaN(beta) || Double.isNaN(val)) {
+ return Double.NaN;
+ }
+ if(val < 0. || val > 1.) {
+ return 0.;
+ }
+ if(val == 0.) {
+ if(alpha > 1.) {
+ return 0.;
+ }
+ if(alpha < 1.) {
+ return Double.POSITIVE_INFINITY;
+ }
+ return beta;
+ }
+ if(val == 1.) {
+ if(beta > 1.) {
+ return 0.;
+ }
+ if(beta < 1.) {
+ return Double.POSITIVE_INFINITY;
+ }
+ return alpha;
+ }
+ return Math.exp(-logBeta(alpha, beta) + Math.log(val) * (alpha - 1) + Math.log1p(-val) * (beta - 1));
+ }
+
+ /**
+ * Compute log beta(a,b)
+ *
+ * @param alpha Shape parameter a
+ * @param beta Shape parameter b
+ * @return Logarithm of result
+ */
+ public static double logBeta(double alpha, double beta) {
+ return GammaDistribution.logGamma(alpha) + GammaDistribution.logGamma(beta) - GammaDistribution.logGamma(alpha + beta);
+ }
+
+ /**
+ * Computes the regularized incomplete beta function I_x(a, b) which is also
+ * the CDF of the beta distribution. Based on the book "Numerical Recipes"
+ *
+ * @param alpha Parameter a
+ * @param beta Parameter b
+ * @param x Parameter x
+ * @return Value of the regularized incomplete beta function
+ */
+ public static double regularizedIncBeta(double x, double alpha, double beta) {
+ if(alpha <= 0.0 || beta <= 0.0 || Double.isNaN(alpha) || Double.isNaN(beta) || Double.isNaN(x)) {
+ return Double.NaN;
+ }
+ if(x <= 0.0) {
+ return 0.0;
+ }
+ if(x >= 1.0) {
+ return 1.0;
+ }
+ if(alpha > SWITCH && beta > SWITCH) {
+ return regularizedIncBetaQuadrature(alpha, beta, x);
+ }
+ double bt = Math.exp(-logBeta(alpha, beta) + alpha * Math.log(x) + beta * Math.log1p(-x));
+ if(x < (alpha + 1.0) / (alpha + beta + 2.0)) {
+ return bt * regularizedIncBetaCF(alpha, beta, x) / alpha;
+ }
+ else {
+ return 1.0 - bt * regularizedIncBetaCF(beta, alpha, 1.0 - x) / beta;
+ }
+ }
+
+ /**
+ * Returns the regularized incomplete beta function I_x(a, b) Includes the
+ * continued fraction way of computing, based on the book "Numerical Recipes".
+ *
+ * @param alpha Parameter a
+ * @param beta Parameter b
+ * @param x Parameter x
+ * @return result
+ */
+ protected static double regularizedIncBetaCF(double alpha, double beta, double x) {
+ final double FPMIN = Double.MIN_VALUE / NUM_PRECISION;
+ double qab = alpha + beta;
+ double qap = alpha + 1.0;
+ double qam = alpha - 1.0;
+ double c = 1.0;
+ double d = 1.0 - qab * x / qap;
+ if(Math.abs(d) < FPMIN) {
+ d = FPMIN;
+ }
+ d = 1.0 / d;
+ double h = d;
+ for(int m = 1; m < 10000; m++) {
+ int m2 = 2 * m;
+ double aa = m * (beta - m) * x / ((qam + m2) * (alpha + m2));
+ d = 1.0 + aa * d;
+ if(Math.abs(d) < FPMIN) {
+ d = FPMIN;
+ }
+ c = 1.0 + aa / c;
+ if(Math.abs(c) < FPMIN) {
+ c = FPMIN;
+ }
+ d = 1.0 / d;
+ h *= d * c;
+ aa = -(alpha + m) * (qab + m) * x / ((alpha + m2) * (qap + m2));
+ d = 1.0 + aa * d;
+ if(Math.abs(d) < FPMIN) {
+ d = FPMIN;
+ }
+ c = 1.0 + aa / c;
+ if(Math.abs(c) < FPMIN) {
+ c = FPMIN;
+ }
+ d = 1.0 / d;
+ double del = d * c;
+ h *= del;
+ if(Math.abs(del - 1.0) <= NUM_PRECISION) {
+ break;
+ }
+ }
+ return h;
+ }
+
+ /**
+ * Returns the regularized incomplete beta function I_x(a, b) by quadrature,
+ * based on the book "Numerical Recipes".
+ *
+ * @param alpha Parameter a
+ * @param beta Parameter b
+ * @param x Parameter x
+ * @return result
+ */
+ protected static double regularizedIncBetaQuadrature(double alpha, double beta, double x) {
+ double a1 = alpha - 1.0;
+ double b1 = beta - 1.0;
+ double mu = alpha / (alpha + beta);
+ double lnmu = Math.log(mu);
+ double lnmuc = Math.log1p(-mu);
+ double t = Math.sqrt(alpha * beta / ((alpha + beta) * (alpha + beta) * (alpha + beta + 1.0)));
+ double xu;
+ if(x > alpha / (alpha + beta)) {
+ if(x >= 1.0) {
+ return 1.0;
+ }
+ xu = Math.min(1.0, Math.max(mu + 10.0 * t, x + 5.0 * t));
+ }
+ else {
+ if(x <= 0.0) {
+ return 0.0;
+ }
+ xu = Math.max(0.0, Math.min(mu - 10.0 * t, x - 5.0 * t));
+ }
+ double sum = 0.0;
+ for(int i = 0; i < GAUSSLEGENDRE_Y.length; i++) {
+ t = x + (xu - x) * GAUSSLEGENDRE_Y[i];
+ sum += GAUSSLEGENDRE_W[i] * Math.exp(a1 * (Math.log(t) - lnmu) + b1 * (Math.log1p(-t) - lnmuc));
+ }
+ double ans = sum * (xu - x) * Math.exp(a1 * lnmu - GammaDistribution.logGamma(alpha) + b1 * lnmuc - GammaDistribution.logGamma(b1) + GammaDistribution.logGamma(alpha + beta));
+ return ans > 0 ? 1.0 - ans : -ans;
+ }
+
+ /**
+ * Compute quantile (inverse cdf) for Beta distributions.
+ *
+ * @param p Probability
+ * @param alpha Shape parameter a
+ * @param beta Shape parameter b
+ * @return Probit for Beta distribution
+ */
+ public static double quantile(double p, double alpha, double beta) {
+ // Valid parameters
+ if(Double.isNaN(alpha) || Double.isNaN(beta) || Double.isNaN(p) || alpha < 0. || beta < 0.) {
+ return Double.NaN;
+ }
+ if(p < 0 || p > 1) {
+ return Double.NaN;
+ }
+ if(p == 0) {
+ return 0.0;
+ }
+ if(p == 1) {
+ return 1.0;
+ }
+ // Simpler to compute inverse?
+ if(p > 0.5) {
+ return 1 - rawQuantile(1 - p, beta, alpha, logBeta(beta, alpha));
+ }
+ else {
+ return rawQuantile(p, alpha, beta, logBeta(alpha, beta));
+ }
+ }
+
+ /**
+ * Raw quantile function
+ *
+ * @param p P, must be 0 < p <= .5
+ * @param alpha Alpha
+ * @param beta Beta
+ * @param logbeta log Beta(alpha, beta)
+ * @return Position
+ */
+ protected static double rawQuantile(double p, double alpha, double beta, final double logbeta) {
+ // Initial estimate for x
+ double x;
+ {
+ // Very fast approximation of y.
+ double tmp = Math.sqrt(-2 * Math.log(p));
+ double y = tmp - (2.30753 + 0.27061 * tmp) / (1. + (0.99229 + 0.04481 * tmp) * tmp);
+
+ if(alpha > 1 && beta > 1) {
+ double r = (y * y - 3.) / 6.;
+ double s = 1. / (alpha + alpha - 1.);
+ double t = 1. / (beta + beta - 1.);
+ double h = 2. / (s + t);
+ double w = y * Math.sqrt(h + r) / h - (t - s) * (r + 5. / 6. - 2. / (3. * h));
+ x = alpha / (alpha + beta * Math.exp(w + w));
+ }
+ else {
+ double r = beta + beta;
+ double t = 1. / (9. * beta);
+ t = r * Math.pow(1. - t + y * Math.sqrt(t), 3.0);
+ if(t <= 0.) {
+ x = 1. - Math.exp((Math.log1p(-p) + Math.log(beta) + logbeta) / beta);
+ }
+ else {
+ t = (4. * alpha + r - 2.) / t;
+ if(t <= 1.) {
+ x = Math.exp((Math.log(p * alpha) + logbeta) / alpha);
+ }
+ else {
+ x = 1. - 2. / (t + 1.);
+ }
+ }
+ }
+ // Degenerate initial approximations
+ if(x < 3e-308 || x > 1 - 2.22e-16) {
+ x = 0.5;
+ }
+ }
+
+ // Newon-Raphson method using the CDF
+ {
+ final double ialpha = 1 - alpha;
+ final double ibeta = 1 - beta;
+
+ // Desired accuracy, as from GNU R adoption of AS 109
+ final double acu = Math.max(1e-300, Math.pow(10., -13 - 2.5 / (alpha * alpha) - .5 / (p * p)));
+ double prevstep = 0., y = 0., stepsize = 1;
+
+ for(int outer = 0; outer < 1000; outer++) {
+ // Current CDF value
+ double ynew = cdf(x, alpha, beta);
+ if(Double.isInfinite(ynew)) { // Degenerated.
+ return Double.NaN;
+ }
+ // Error gradient
+ ynew = (ynew - p) * Math.exp(logbeta + ialpha * Math.log(x) + ibeta * Math.log1p(-x));
+ if(ynew * y <= 0.) {
+ prevstep = Math.max(Math.abs(stepsize), 3e-308);
+ }
+ // Inner loop: try different step sizes: y * 3^-i
+ double g = 1, xnew = 0.;
+ for(int inner = 0; inner < 1000; inner++) {
+ stepsize = g * ynew;
+ if(Math.abs(stepsize) < prevstep) {
+ xnew = x - stepsize; // Candidate x
+ if(xnew >= 0. && xnew <= 1.) {
+ // Close enough
+ if(prevstep <= acu || Math.abs(ynew) <= acu) {
+ return x;
+ }
+ if(xnew != 0. && xnew != 1.) {
+ break;
+ }
+ }
+ }
+ g /= 3.;
+ }
+ // Convergence
+ if(Math.abs(xnew - x) < 1e-15 * x) {
+ return x;
+ }
+ // Iterate with new values
+ x = xnew;
+ y = ynew;
+ }
+ }
+ // Not converged in Newton-Raphson
+ throw new AbortException("Beta quantile computation did not converge.");
+ }
+} \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/ChiDistribution.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/ChiDistribution.java
index c561c4cd..84c86e98 100644
--- a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/ChiDistribution.java
+++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/ChiDistribution.java
@@ -1,5 +1,7 @@
package de.lmu.ifi.dbs.elki.math.statistics.distribution;
+import de.lmu.ifi.dbs.elki.utilities.exceptions.ExceptionMessages;
+
/*
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
@@ -27,8 +29,10 @@ package de.lmu.ifi.dbs.elki.math.statistics.distribution;
* Chi distribution.
*
* @author Erich Schubert
+ *
+ * @apiviz.composedOf ChiSquaredDistribution
*/
-public class ChiDistribution implements Distribution {
+public class ChiDistribution implements DistributionWithRandom {
/**
* Degrees of freedom. Usually integer.
*/
@@ -89,4 +93,15 @@ public class ChiDistribution implements Distribution {
public static double cdf(double val, double dof) {
return GammaDistribution.regularizedGammaP(dof / 2, val * val / 2);
}
+
+ // FIXME: implement!
+ @Override
+ public double quantile(double val) {
+ throw new UnsupportedOperationException(ExceptionMessages.UNSUPPORTED_NOT_YET);
+ }
+
+ @Override
+ public String toString() {
+ return "ChiDistribution(dof=" + dof + ")";
+ }
} \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/ChiSquaredDistribution.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/ChiSquaredDistribution.java
index 0ab39c78..8555afd3 100644
--- a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/ChiSquaredDistribution.java
+++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/ChiSquaredDistribution.java
@@ -1,5 +1,7 @@
package de.lmu.ifi.dbs.elki.math.statistics.distribution;
+import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
+
/*
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
@@ -35,7 +37,7 @@ public class ChiSquaredDistribution extends GammaDistribution {
* @param dof Degrees of freedom.
*/
public ChiSquaredDistribution(double dof) {
- super(.5 * dof, 2.0);
+ super(.5 * dof, .5);
}
/**
@@ -69,4 +71,23 @@ public class ChiSquaredDistribution extends GammaDistribution {
}
return Math.exp((k - 1.0) * Math.log(x * 2.0) - x * 2.0 - logGamma(k)) * 2.0;
}
+
+ /**
+ * Return the quantile function for this distribution
+ *
+ * Reference:
+ * <p>
+ * Algorithm AS 91: The percentage points of the $\chi$^2 distribution<br />
+ * D.J. Best, D. E. Roberts<br />
+ * Journal of the Royal Statistical Society. Series C (Applied Statistics)
+ * </p>
+ *
+ * @param x Quantile
+ * @param dof Degrees of freedom
+ * @return quantile position
+ */
+ @Reference(title = "Algorithm AS 91: The percentage points of the $\\chi^2$ distribution", authors = "D.J. Best, D. E. Roberts", booktitle = "Journal of the Royal Statistical Society. Series C (Applied Statistics)")
+ public static double quantile(double x, double dof) {
+ return GammaDistribution.quantile(x, .5 * dof, .5);
+ }
} \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/ConstantDistribution.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/ConstantDistribution.java
index 1f36dd4a..b046f0ef 100644
--- a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/ConstantDistribution.java
+++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/ConstantDistribution.java
@@ -28,7 +28,7 @@ package de.lmu.ifi.dbs.elki.math.statistics.distribution;
*
* @author Erich Schubert
*/
-public class ConstantDistribution implements Distribution {
+public class ConstantDistribution implements DistributionWithRandom {
/**
* The constant
*/
@@ -58,4 +58,9 @@ public class ConstantDistribution implements Distribution {
public double cdf(double val) {
return (val >= c) ? 1.0 : 0.0;
}
+
+ @Override
+ public double quantile(double val) {
+ return c;
+ }
} \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/Distribution.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/Distribution.java
index 290e6434..5b6cd286 100644
--- a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/Distribution.java
+++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/Distribution.java
@@ -24,20 +24,14 @@ package de.lmu.ifi.dbs.elki.math.statistics.distribution;
*/
/**
- * Interface for a simple distribution generator with a PDF, i.e. it can also
- * compute a density
+ * Statistical distributions, with their common functions. See
+ * {@link DistributionWithRandom} for distributions that also have a random
+ * generator included.
*
* @author Erich Schubert
*/
public interface Distribution {
/**
- * Generate a new random value
- *
- * @return new random value
- */
- public double nextRandom();
-
- /**
* Return the density of an existing value
*
* @param val existing value
@@ -54,6 +48,14 @@ public interface Distribution {
public double cdf(double val);
/**
+ * Quantile aka probit (for normal) aka inverse CDF (invcdf, cdf^-1) function.
+ *
+ * @param val Quantile to find
+ * @return Quantile position
+ */
+ public double quantile(double val);
+
+ /**
* Describe the distribution
*
* @return description
diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/DistributionWithRandom.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/DistributionWithRandom.java
new file mode 100644
index 00000000..af272528
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/DistributionWithRandom.java
@@ -0,0 +1,37 @@
+package de.lmu.ifi.dbs.elki.math.statistics.distribution;
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2012
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/**
+ * Distribution that also has support for generating random numbers.
+ *
+ * @author Erich Schubert
+ */
+public interface DistributionWithRandom extends Distribution {
+ /**
+ * Generate a new random value
+ *
+ * @return new random value
+ */
+ public double nextRandom();
+}
diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/GammaDistribution.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/GammaDistribution.java
index 6830f25a..21eebc51 100644
--- a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/GammaDistribution.java
+++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/GammaDistribution.java
@@ -25,14 +25,21 @@ package de.lmu.ifi.dbs.elki.math.statistics.distribution;
import java.util.Random;
+import de.lmu.ifi.dbs.elki.logging.LoggingUtil;
import de.lmu.ifi.dbs.elki.math.MathUtil;
+import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
/**
* Gamma Distribution, with random generation and density functions.
*
* @author Erich Schubert
*/
-public class GammaDistribution implements Distribution {
+public class GammaDistribution implements DistributionWithRandom {
+ /**
+ * Euler–Mascheroni constant
+ */
+ public static final double EULERS_CONST = 0.5772156649015328606065120900824024;
+
/**
* LANCZOS-Coefficients for Gamma approximation.
*
@@ -102,6 +109,11 @@ public class GammaDistribution implements Distribution {
}
@Override
+ public double quantile(double val) {
+ return quantile(val, k, theta);
+ }
+
+ @Override
public double nextRandom() {
return nextRandom(k, theta, random);
}
@@ -113,7 +125,7 @@ public class GammaDistribution implements Distribution {
*/
@Override
public String toString() {
- return "Gamma Distribution (k=" + k + ", theta=" + theta + ")";
+ return "GammaDistribution(k=" + k + ", theta=" + theta + ")";
}
/**
@@ -139,7 +151,19 @@ public class GammaDistribution implements Distribution {
* @return cdf value
*/
public static double cdf(double val, double k, double theta) {
- return regularizedGammaP(k, val / theta);
+ return regularizedGammaP(k, val * theta);
+ }
+
+ /**
+ * The log CDF, static version.
+ *
+ * @param val Value
+ * @param k Shape k
+ * @param theta Theta = 1.0/Beta aka. "scaling" parameter
+ * @return cdf value
+ */
+ public static double logcdf(double val, double k, double theta) {
+ return logregularizedGammaP(k, val * theta);
}
/**
@@ -170,6 +194,33 @@ public class GammaDistribution implements Distribution {
}
/**
+ * Gamma distribution PDF (with 0.0 for x &lt; 0)
+ *
+ * @param x query value
+ * @param k Alpha
+ * @param theta Theta = 1 / Beta
+ * @return probability density
+ */
+ public static double logpdf(double x, double k, double theta) {
+ if(x < 0) {
+ return Double.NEGATIVE_INFINITY;
+ }
+ if(x == 0) {
+ if(k == 1.0) {
+ return Math.log(theta);
+ }
+ else {
+ return Double.NEGATIVE_INFINITY;
+ }
+ }
+ if(k == 1.0) {
+ return Math.log(theta) - x * theta;
+ }
+
+ return Math.log(theta) + (k - 1.0) * Math.log(x * theta) - x * theta - logGamma(k);
+ }
+
+ /**
* Compute logGamma.
*
* Based loosely on "Numerical Recpies" and the work of Paul Godfrey at
@@ -195,6 +246,22 @@ public class GammaDistribution implements Distribution {
}
/**
+ * Compute the regular Gamma function.
+ *
+ * Note: for numerical reasons, it is preferrable to use {@link #logGamma}
+ * when possible! In particular, this method just computes
+ * {@code Math.exp(logGamma(x))} anyway.
+ *
+ * Try to postpone the {@code Math.exp} call to preserve numeric range!
+ *
+ * @param x Position
+ * @return Gamma at this position
+ */
+ public static double gamma(double x) {
+ return Math.exp(logGamma(x));
+ }
+
+ /**
* Returns the regularized gamma function P(a, x).
*
* Includes the quadrature way of computing.
@@ -236,6 +303,49 @@ public class GammaDistribution implements Distribution {
}
/**
+ * Returns the regularized gamma function log P(a, x).
+ *
+ * Includes the quadrature way of computing.
+ *
+ * TODO: find "the" most accurate version of this. We seem to agree with
+ * others for the first 10+ digits, but diverge a bit later than that.
+ *
+ * @param a Parameter a
+ * @param x Parameter x
+ * @return Gamma value
+ */
+ public static double logregularizedGammaP(final double a, final double x) {
+ // Special cases
+ if(Double.isNaN(a) || Double.isNaN(x) || (a <= 0.0) || (x < 0.0)) {
+ return Double.NaN;
+ }
+ if(x == 0.0) {
+ return Double.NEGATIVE_INFINITY;
+ }
+ if(x >= a + 1) {
+ // Expected to converge faster
+ // FIXME: and in log?
+ return Math.log(1.0 - regularizedGammaQ(a, x));
+ }
+ // Loosely following "Numerical Recipes"
+ double del = 1.0 / a;
+ double sum = del;
+ for(int n = 1; n < Integer.MAX_VALUE; n++) {
+ // compute next element in the series
+ del *= x / (a + n);
+ sum = sum + del;
+ if(Math.abs(del / sum) < NUM_PRECISION || sum >= Double.POSITIVE_INFINITY) {
+ break;
+ }
+ }
+ if(Double.isInfinite(sum)) {
+ return 0;
+ }
+ // TODO: reread numerical recipes, can we replace log(sum)?
+ return -x + (a * Math.log(x)) - logGamma(a) + Math.log(sum);
+ }
+
+ /**
* Returns the regularized gamma function Q(a, x) = 1 - P(a, x).
*
* Includes the continued fraction way of computing, based loosely on the book
@@ -313,7 +423,7 @@ public class GammaDistribution implements Distribution {
final double e1 = 1.000000000, e2 = 0.499999994, e3 = 0.166666848;
final double e4 = 0.041664508, e5 = 0.008345522, e6 = 0.001353826;
final double e7 = 0.000247453;
-
+
if(k < 1.0) { // Base case, for small k
final double b = 1.0 + 0.36788794412 * k; // Step 1
while(true) {
@@ -360,11 +470,11 @@ public class GammaDistribution implements Distribution {
/* v2 = tv2; */
v12 = tv12;
}
-
+
// double b = 0.0, c = 0.0;
// double si = 0.0, q0 = 0.0;
final double b, c, si, q0;
-
+
// Simpler accept cases & parameter computation
{
final double t = v1 * Math.sqrt(-2.0 * Math.log(v12) / v12);
@@ -373,14 +483,14 @@ public class GammaDistribution implements Distribution {
if(t >= 0.0) {
return (gds / theta); // Immediate acceptance
}
-
+
// Random uniform
final double un = random.nextDouble();
// Squeeze acceptance
if(d * un <= t * t * t) {
return (gds / theta);
}
-
+
if(k != -1.0) { // Step 4. Set-up for hat case
final double r = 1.0 / k;
q0 = ((((((((q9 * r + q8) * r + q7) * r + q6) * r + q5) * r + q4) * r + q3) * r + q2) * r + q1) * r;
@@ -425,7 +535,7 @@ public class GammaDistribution implements Distribution {
}
}
}
-
+
// Double exponential deviate t
while(true) {
double e, u, sign_u, t;
@@ -438,7 +548,7 @@ public class GammaDistribution implements Distribution {
t = b + (e * si) * sign_u;
}
while(t <= -0.71874483771719);
-
+
// New v(t) and q(t)
final double v = t / (s + s);
final double q;
@@ -467,4 +577,370 @@ public class GammaDistribution implements Distribution {
}
}
}
+
+ /**
+ * Approximate probit for chi squared distribution
+ *
+ * Based on first half of algorithm AS 91
+ *
+ * Reference:
+ * <p>
+ * Algorithm AS 91: The percentage points of the $\chi$ 2 distribution<br />
+ * D.J. Best, D. E. Roberts<br />
+ * Journal of the Royal Statistical Society. Series C (Applied Statistics)
+ * </p>
+ *
+ * @param p Probit value
+ * @param nu Shape parameter for Chi, nu = 2 * k
+ * @param g log(nu)
+ * @return Probit for chi squared
+ */
+ @Reference(title = "Algorithm AS 91: The percentage points of the $\\chi^2$ distribution", authors = "D.J. Best, D. E. Roberts", booktitle = "Journal of the Royal Statistical Society. Series C (Applied Statistics)")
+ protected static double chisquaredProbitApproximation(final double p, double nu, double g) {
+ final double EPS1 = 1e-2; // Approximation quality
+ // Sanity checks
+ if(Double.isNaN(p) || Double.isNaN(nu)) {
+ return Double.NaN;
+ }
+ // Range check
+ if(p <= 0) {
+ return 0;
+ }
+ if(p >= 1) {
+ return Double.POSITIVE_INFINITY;
+ }
+ // Invalid parameters
+ if(nu <= 0) {
+ return Double.NaN;
+ }
+ // Shape of gamma distribution, "XX" in AS 91
+ final double k = 0.5 * nu;
+
+ // For small chi squared values - AS 91
+ final double logp = Math.log(p);
+ if(nu < -1.24 * logp) {
+ // FIXME: implement and use logGammap1 instead - more stable?
+ //
+ // final double lgam1pa = (alpha < 0.5) ? logGammap1(alpha) :
+ // (Math.log(alpha) + g);
+ // return Math.exp((lgam1pa + logp) / alpha + MathUtil.LOG2);
+ // This is literal AS 91, above is the GNU R variant.
+ return Math.pow(p * k * Math.exp(g + k * MathUtil.LOG2), 1 / k);
+ }
+ else if(nu > 0.32) {
+ // Wilson and Hilferty estimate: - AS 91 at 3
+ final double x = NormalDistribution.quantile(p, 0, 1);
+ final double p1 = 2. / (9. * nu);
+ double ch = nu * Math.pow(x * Math.sqrt(p1) + 1 - p1, 3);
+
+ // Better approximation for p tending to 1:
+ if(ch > 2.2 * nu + 6) {
+ ch = -2 * (Math.log1p(-p) - (k - 1) * Math.log(0.5 * ch) + g);
+ }
+ return ch;
+ }
+ else {
+ // nu <= 0.32, AS 91 at 1
+ final double C7 = 4.67, C8 = 6.66, C9 = 6.73, C10 = 13.32;
+ final double ag = Math.log1p(-p) + g + (k - 1) * MathUtil.LOG2;
+ double ch = 0.4;
+ while(true) {
+ final double p1 = 1 + ch * (C7 + ch);
+ final double p2 = ch * (C9 + ch * (C8 + ch));
+ final double t = -0.5 + (C7 + 2 * ch) / p1 - (C9 + ch * (C10 + 3 * ch)) / p2;
+ final double delta = (1 - Math.exp(ag + 0.5 * ch) * p2 / p1) / t;
+ ch -= delta;
+ if(Math.abs(delta) > EPS1 * Math.abs(ch)) {
+ return ch;
+ }
+ }
+ }
+ }
+
+ /**
+ * Compute probit (inverse cdf) for Gamma distributions.
+ *
+ * Based on algorithm AS 91:
+ *
+ * Reference:
+ * <p>
+ * Algorithm AS 91: The percentage points of the $\chi$^2 distribution<br />
+ * D.J. Best, D. E. Roberts<br />
+ * Journal of the Royal Statistical Society. Series C (Applied Statistics)
+ * </p>
+ *
+ * @param p Probability
+ * @param k k, alpha aka. "shape" parameter
+ * @param theta Theta = 1.0/Beta aka. "scaling" parameter
+ * @return Probit for Gamma distribution
+ */
+ @Reference(title = "Algorithm AS 91: The percentage points of the $\\chi$^2 distribution", authors = "D.J. Best, D. E. Roberts", booktitle = "Journal of the Royal Statistical Society. Series C (Applied Statistics)")
+ public static double quantile(double p, double k, double theta) {
+ final double EPS2 = 5e-7; // final precision of AS 91
+ final int MAXIT = 1000;
+
+ // Avoid degenerates
+ if(Double.isNaN(p) || Double.isNaN(k) || Double.isNaN(theta)) {
+ return Double.NaN;
+ }
+ // Range check
+ if(p <= 0) {
+ return 0;
+ }
+ if(p >= 1) {
+ return Double.POSITIVE_INFINITY;
+ }
+ // Shape parameter check
+ if(k < 0 || theta <= 0) {
+ return Double.NaN;
+ }
+ // Corner case - all at 0
+ if(k == 0) {
+ return 0.;
+ }
+
+ int max_newton_iterations = 1;
+ // For small values, ensure some refinement iterations
+ if(k < 1e-10) {
+ max_newton_iterations = 7;
+ }
+
+ final double g = logGamma(k); // == logGamma(v/2)
+
+ // Phase I, an initial rough approximation
+ // First half of AS 91
+ double ch = chisquaredProbitApproximation(p, 2 * k, g);
+ // Second hald of AS 91 follows:
+ // Refine ChiSquared approximation
+ chisq: {
+ if(Double.isInfinite(ch)) {
+ // Cannot refine infinity
+ max_newton_iterations = 0;
+ break chisq;
+ }
+ if(ch < EPS2) {
+ // Do not iterate, but refine with newton method
+ max_newton_iterations = 20;
+ break chisq;
+ }
+ if(p > 1 - 1e-14 || p < 1e-100) {
+ // Not in appropriate value range for AS 91
+ max_newton_iterations = 20;
+ break chisq;
+ }
+
+ // Phase II: Iteration
+ final double c = k - 1;
+ final double ch0 = ch; // backup initial approximation
+ for(int i = 1; i <= MAXIT; i++) {
+ final double q = ch; // previous approximation
+ final double p1 = 0.5 * ch;
+ final double p2 = p - regularizedGammaP(k, p1);
+ if(Double.isInfinite(p2) || ch <= 0) {
+ ch = ch0;
+ max_newton_iterations = 27;
+ break chisq;
+ }
+ { // Taylor series of AS 91: iteration via "goto 4"
+ final double t = p2 * Math.exp(k * MathUtil.LOG2 + g + p1 - c * Math.log(ch));
+ final double b = t / ch;
+ final double a = 0.5 * t - b * c;
+ final double s1 = (210. + a * (140. + a * (105. + a * (84. + a * (70. + 60. * a))))) / 420.;
+ final double s2 = (420. + a * (735. + a * (966. + a * (1141. + 1278 * a)))) / 2520.;
+ final double s3 = (210. + a * (462. + a * (707. + 932. * a))) / 2520.;
+ final double s4 = (252. + a * (672. + 1182. * a) + c * (294. + a * (889. + 1740. * a))) / 5040.;
+ final double s5 = (84. + 2264. * a + c * (1175. + 606. * a)) / 2520.;
+ final double s6 = (120. + c * (346. + 127. * c)) / 5040.;
+ ch += t * (1 + 0.5 * t * s1 - b * c * (s1 - b * (s2 - b * (s3 - b * (s4 - b * (s5 - b * s6))))));
+ }
+ if(Math.abs(q - ch) < EPS2 * ch) {
+ break chisq;
+ }
+ // Divergence treatment, from GNU R
+ if(Math.abs(q - ch) > 0.1 * Math.abs(ch)) {
+ ch = ((ch < q) ? 0.9 : 1.1) * q;
+ }
+ }
+ LoggingUtil.warning("No convergence in AS 91 Gamma probit.");
+ // no convergence in MAXIT iterations -- but we add Newton now...
+ }
+ double x = 0.5 * ch / theta;
+ if(max_newton_iterations > 0) {
+ // Refine result using final Newton steps.
+ // TODO: add unit tests that show an improvement! Maybe in logscale only?
+ x = gammaQuantileNewtonRefinement(Math.log(p), k, theta, max_newton_iterations, x);
+ }
+ return x;
+ }
+
+ /**
+ * Refinement of ChiSquared probit using Newton iterations.
+ *
+ * A trick used by GNU R to improve precision.
+ *
+ * @param logpt Target value of log p
+ * @param k Alpha
+ * @param theta Theta = 1 / Beta
+ * @param maxit Maximum number of iterations to do
+ * @param x Initial estimate
+ * @return Refined value
+ */
+ protected static double gammaQuantileNewtonRefinement(final double logpt, final double k, final double theta, final int maxit, double x) {
+ final double EPS_N = 1e-15; // Precision threshold
+ // 0 is not possible, try MIN_NORMAL instead
+ if(x <= 0) {
+ x = Double.MIN_NORMAL;
+ }
+ // Current estimation
+ double logpc = logcdf(x, k, theta);
+ if(x == Double.MIN_NORMAL && logpc > logpt * (1. + 1e-7)) {
+ return 0.;
+ }
+ if(logpc == Double.NEGATIVE_INFINITY) {
+ return 0.;
+ }
+ // Refine by newton iterations
+ for(int i = 0; i < maxit; i++) {
+ // Error of current approximation
+ final double logpe = logpc - logpt;
+ if(Math.abs(logpe) < Math.abs(EPS_N * logpt)) {
+ break;
+ }
+ // Step size is controlled by PDF:
+ final double g = logpdf(x, k, theta);
+ if(g == Double.NEGATIVE_INFINITY) {
+ break;
+ }
+ final double newx = x - logpe * Math.exp(logpc - g);
+ // New estimate:
+ logpc = logcdf(newx, k, theta);
+ if(Math.abs(logpc - logpt) > Math.abs(logpe) || (i > 0 && Math.abs(logpc - logpt) == Math.abs(logpe))) {
+ // no further improvement
+ break;
+ }
+ x = newx;
+ }
+ return x;
+ }
+
+ /**
+ * Compute the Psi / Digamma function
+ *
+ * Reference:
+ * <p>
+ * J. M. Bernando<br />
+ * Algorithm AS 103: Psi (Digamma) Function<br />
+ * Statistical Algorithms
+ * </p>
+ *
+ * TODO: is there a more accurate version maybe in R?
+ *
+ * @param x Position
+ * @return digamma value
+ */
+ @Reference(authors = "J. M. Bernando", title = "Algorithm AS 103: Psi (Digamma) Function", booktitle = "Statistical Algorithms")
+ public static double digamma(double x) {
+ if(!(x > 0)) {
+ return Double.NaN;
+ }
+ // Method of equation 5:
+ if(x <= 1e-5) {
+ return -EULERS_CONST - 1. / x;
+ }
+ // Method of equation 4:
+ else if(x > 49.) {
+ final double ix2 = 1. / (x * x);
+ // Partial series expansion
+ return Math.log(x) - 0.5 / x - ix2 * ((1.0 / 12.) + ix2 * (1.0 / 120. - ix2 / 252.));
+ // + O(x^8) error
+ }
+ else {
+ // Stirling expansion
+ return digamma(x + 1.) - 1. / x;
+ }
+ }
+
+ /**
+ * Compute the Trigamma function. Based on digamma.
+ *
+ * TODO: is there a more accurate version maybe in R?
+ *
+ * @param x Position
+ * @return trigamma value
+ */
+ public static double trigamma(double x) {
+ if(!(x > 0)) {
+ return Double.NaN;
+ }
+ // Method of equation 5:
+ if(x <= 1e-5) {
+ return 1. / (x * x);
+ }
+ // Method of equation 4:
+ else if(x > 49.) {
+ final double ix2 = 1. / (x * x);
+ // Partial series expansion
+ return 1 / x - ix2 / 2. + ix2 / x * (1.0 / 6. - ix2 * (1.0 / 30. + ix2 / 42.));
+ // + O(x^8) error
+ }
+ else {
+ // Stirling expansion
+ return trigamma(x + 1.) - 1. / (x * x);
+ }
+ }
+
+ /**
+ * Mean least squares estimation of Gamma distribution to a set of
+ * observations.
+ *
+ * @param data Data
+ * @return Assumed distribution
+ */
+ public static GammaDistribution estimate(double[] data) {
+ return estimate(data, data.length);
+ }
+
+ /**
+ * Mean least squares estimation of Gamma distribution to a set of
+ * observations.
+ *
+ * Reference:
+ * <p>
+ * Maximum likelihood estimation of the parameters of the gamma distribution
+ * and their bias<br />
+ * S. C. Choi, R. Wette<br />
+ * in: Technometrics
+ * </p>
+ *
+ * @param data Data
+ * @param len Length of array
+ * @return Assumed distribution
+ */
+ @Reference(title = "Maximum likelihood estimation of the parameters of the gamma distribution and their bias", authors = "S. C. Choi, R. Wette", booktitle = "Technometrics", url = "http://www.jstor.org/stable/10.2307/1266892")
+ public static GammaDistribution estimate(double[] data, int len) {
+ double meanx = 0, meanlogx = 0;
+ for(int i = 0; i < len; i++) {
+ final double logx = Math.log(data[i]);
+ final double deltax = data[i] - meanx;
+ final double deltalogx = logx - meanlogx;
+ meanx += deltax / (i + 1.);
+ meanlogx += deltalogx / (i + 1.);
+ }
+ // Initial approximation
+ final double logmeanx = Math.log(meanx);
+ final double diff = logmeanx - meanlogx;
+ double k = (3 - diff + Math.sqrt((diff - 3) * (diff - 3) + 24 * diff)) / (12 * diff);
+
+ // Refine via newton iteration, based on Choi and Wette equation
+ while(true) {
+ double kdelta = (Math.log(k) - digamma(k) - diff) / (1 / k - trigamma(k));
+ if(Math.abs(kdelta) < 1E-8 || Double.isNaN(kdelta)) {
+ break;
+ }
+ k += kdelta;
+ }
+ // Estimate theta:
+ final double theta = k / meanx;
+ return new GammaDistribution(k, theta);
+ }
} \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/NormalDistribution.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/NormalDistribution.java
index 919cc2e3..9180b59e 100644
--- a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/NormalDistribution.java
+++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/NormalDistribution.java
@@ -32,7 +32,7 @@ import de.lmu.ifi.dbs.elki.math.MathUtil;
*
* @author Erich Schubert
*/
-public class NormalDistribution implements Distribution {
+public class NormalDistribution implements DistributionWithRandom {
/**
* Coefficients for erf approximation.
*
@@ -148,20 +148,25 @@ public class NormalDistribution implements Distribution {
public double pdf(double val) {
return pdf(val, mean, stddev);
}
-
+
@Override
public double cdf(double val) {
return cdf(val, mean, stddev);
}
@Override
+ public double quantile(double q) {
+ return quantile(q, mean, stddev);
+ }
+
+ @Override
public double nextRandom() {
return mean + random.nextGaussian() * stddev;
}
@Override
public String toString() {
- return "Normal Distribution (mean="+mean+", stddev="+stddev+")";
+ return "NormalDistribution(mean=" + mean + ", stddev=" + stddev + ")";
}
/**
@@ -195,7 +200,7 @@ public class NormalDistribution implements Distribution {
if(Double.isInfinite(x)) {
return (x < 0.0) ? 2 : 0;
}
-
+
double result = Double.NaN;
double absx = Math.abs(x);
// First approximation interval
@@ -249,7 +254,7 @@ public class NormalDistribution implements Distribution {
* @return erfinv(x)
*/
public static double erfinv(double x) {
- return standardNormalProbit(0.5 * (x + 1)) / MathUtil.SQRT2;
+ return standardNormalQuantile(0.5 * (x + 1)) / MathUtil.SQRT2;
}
/**
@@ -261,10 +266,13 @@ public class NormalDistribution implements Distribution {
* by Peter John Acklam
* </p>
*
+ * FIXME: precision of this seems to be rather low, compared to our other
+ * functions. Only about 8-9 digits agree with SciPy/GNU R.
+ *
* @param d Quantile. Must be in [0:1], obviously.
* @return Inverse erf.
*/
- public static double standardNormalProbit(double d) {
+ public static double standardNormalQuantile(double d) {
if(d == 0) {
return Double.NEGATIVE_INFINITY;
}
@@ -319,7 +327,7 @@ public class NormalDistribution implements Distribution {
* @return The CDF of the normal given distribution at x.
*/
public static double cdf(double x, double mu, double sigma) {
- return (1 + erf(x / Math.sqrt(2))) / 2;
+ return .5 * (1 + erf((x - mu) / (MathUtil.SQRT2 * sigma)));
}
/**
@@ -331,7 +339,7 @@ public class NormalDistribution implements Distribution {
* @param sigma Standard deviation.
* @return The probit of the normal given distribution at x.
*/
- public static double probit(double x, double mu, double sigma) {
- return mu + sigma * standardNormalProbit(x);
+ public static double quantile(double x, double mu, double sigma) {
+ return mu + sigma * standardNormalQuantile(x);
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/PoissonDistribution.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/PoissonDistribution.java
new file mode 100644
index 00000000..53fb0dc8
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/PoissonDistribution.java
@@ -0,0 +1,411 @@
+package de.lmu.ifi.dbs.elki.math.statistics.distribution;
+
+import de.lmu.ifi.dbs.elki.math.MathUtil;
+import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
+import de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException;
+import de.lmu.ifi.dbs.elki.utilities.exceptions.ExceptionMessages;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2012
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/**
+ * INCOMPLETE implementation of the poisson distribution.
+ *
+ * TODO: continue implementing, CDF, invcdf and nextRandom are missing
+ *
+ * References:
+ * <p>
+ * Catherine Loader<br />
+ * Fast and Accurate Computation of Binomial Probabilities.
+ * </p>
+ *
+ * @author Erich Schubert
+ */
+public class PoissonDistribution implements Distribution {
+ /**
+ * Number of tries
+ */
+ private int n;
+
+ /**
+ * Success probability
+ */
+ private double p;
+
+ /** Stirling error constants: 1./12 */
+ private final static double S0 = 0.08333333333333333333333d;
+
+ /** Stirling error constants: 1./360 */
+ private final static double S1 = 0.0027777777777777777777777777778d;
+
+ /** Stirling error constants: 1./1260 */
+ private final static double S2 = 0.00079365079365079365079365d;
+
+ /** Stirling error constants: 1./1680 */
+ private final static double S3 = 0.000595238095238095238095238095d;
+
+ /** Stirling error constants: 1./1188 */
+ private final static double S4 = 0.00084175084175084175084175084175d;
+
+ /**
+ * Exact table values for n <= 15 in steps of 0.5
+ *
+ * sfe[n] = ln( (n!*e^n)/((n^n)*sqrt(2*pi*n)) )
+ */
+ private final static double STIRLING_EXACT_ERROR[] = {//
+ 0.0, // 0.0
+ 0.1534264097200273452913848, // 0.5
+ 0.0810614667953272582196702, // 1.0
+ 0.0548141210519176538961390, // 1.5
+ 0.0413406959554092940938221, // 2.0
+ 0.03316287351993628748511048, // 2.5
+ 0.02767792568499833914878929, // 3.0
+ 0.02374616365629749597132920, // 3.5
+ 0.02079067210376509311152277, // 4.0
+ 0.01848845053267318523077934, // 4.5
+ 0.01664469118982119216319487, // 5.0
+ 0.01513497322191737887351255, // 5.5
+ 0.01387612882307074799874573, // 6.0
+ 0.01281046524292022692424986, // 6.5
+ 0.01189670994589177009505572, // 7.0
+ 0.01110455975820691732662991, // 7.5
+ 0.010411265261972096497478567, // 8.0
+ 0.009799416126158803298389475, // 8.5
+ 0.009255462182712732917728637, // 9.0
+ 0.008768700134139385462952823, // 9.5
+ 0.008330563433362871256469318, // 10.0
+ 0.007934114564314020547248100, // 10.5
+ 0.007573675487951840794972024, // 11.0
+ 0.007244554301320383179543912, // 11.5
+ 0.006942840107209529865664152, // 12.0
+ 0.006665247032707682442354394, // 12.5
+ 0.006408994188004207068439631, // 13.0
+ 0.006171712263039457647532867, // 13.5
+ 0.005951370112758847735624416, // 14.0
+ 0.005746216513010115682023589, // 14.5
+ 0.0055547335519628013710386900 // 15.0
+ };
+
+ /**
+ * Constructor.
+ *
+ * Private: API not yet completely implemented!
+ *
+ * @param n Number of tries
+ * @param p Success probability
+ */
+ public PoissonDistribution(int n, double p) {
+ super();
+ this.n = n;
+ this.p = p;
+ }
+
+ /**
+ * Poisson PMF for integer values.
+ *
+ * @param x integer values
+ * @return Probability
+ */
+ @Reference(title = "Fast and accurate computation of binomial probabilities", authors = "C. Loader", booktitle = "", url = "http://projects.scipy.org/scipy/raw-attachment/ticket/620/loader2000Fast.pdf")
+ public double pmf(int x) {
+ // Invalid values
+ if(x < 0 || x > n) {
+ return 0.0;
+ }
+ // Extreme probabilities
+ if(p <= 0d) {
+ return x == 0 ? 1.0 : 0.0;
+ }
+ if(p >= 1d) {
+ return x == n ? 1.0 : 0.0;
+ }
+ // Extreme values of x
+ if(x == 0) {
+ if(p < 0.1) {
+ return Math.exp(-devianceTerm(n, n * (1.0 - p)) - n * p);
+ }
+ else {
+ return Math.exp(n * Math.log(1.0 - p));
+ }
+ }
+ if(x == n) {
+ if(p > 0.9) {
+ return Math.exp(-devianceTerm(n, n * p) - n * (1 - p));
+ }
+ else {
+ return Math.exp(n * Math.log(p));
+ }
+ }
+
+ final double lc = stirlingError(n) - stirlingError(x) - stirlingError(n - x) - devianceTerm(x, n * p) - devianceTerm(n - x, n * (1.0 - p));
+ final double f = (MathUtil.TWOPI * x * (n - x)) / n;
+ return Math.exp(lc) / Math.sqrt(f);
+ }
+
+ @Override
+ @Reference(title = "Fast and accurate computation of binomial probabilities", authors = "C. Loader", booktitle = "", url = "http://projects.scipy.org/scipy/raw-attachment/ticket/620/loader2000Fast.pdf")
+ public double pdf(double x) {
+ // Invalid values
+ if(x < 0 || x > n) {
+ return 0.0;
+ }
+ // Extreme probabilities
+ if(p <= 0d) {
+ return x == 0 ? 1.0 : 0.0;
+ }
+ if(p >= 1d) {
+ return x == n ? 1.0 : 0.0;
+ }
+ final double q = 1 - p;
+ // FIXME: check for x to be integer, return 0 otherwise?
+
+ // Extreme values of x
+ if(x == 0) {
+ if(p < 0.1) {
+ return Math.exp(-devianceTerm(n, n * q) - n * p);
+ }
+ else {
+ return Math.exp(n * Math.log(q));
+ }
+ }
+ if(x == n) {
+ if(p > 0.9) {
+ return Math.exp(-devianceTerm(n, n * p) - n * q);
+ }
+ else {
+ return Math.exp(n * Math.log(p));
+ }
+ }
+ final double lc = stirlingError(n) - stirlingError(x) - stirlingError(n - x) - devianceTerm(x, n * p) - devianceTerm(n - x, n * q);
+ final double f = (MathUtil.TWOPI * x * (n - x)) / n;
+ return Math.exp(lc) / Math.sqrt(f);
+ }
+
+ // FIXME: implement!
+ @Override
+ public double cdf(double val) {
+ throw new AbortException(ExceptionMessages.UNSUPPORTED_NOT_YET);
+ }
+
+ // FIXME: implement!
+ @Override
+ public double quantile(double val) {
+ throw new AbortException(ExceptionMessages.UNSUPPORTED_NOT_YET);
+ }
+
+ /**
+ * Compute the poisson distribution PDF with an offset of + 1
+ *
+ * pdf(x_plus_1 - 1, lambda)
+ *
+ * @param x_plus_1 x+1
+ * @param lambda Lambda
+ * @return pdf
+ */
+ public static double poissonPDFm1(double x_plus_1, double lambda) {
+ if(Double.isInfinite(lambda)) {
+ return 0.;
+ }
+ if(x_plus_1 > 1) {
+ return rawProbability(x_plus_1 - 1, lambda);
+ }
+ if(lambda > Math.abs(x_plus_1 - 1) * MathUtil.LOG2 * Double.MAX_EXPONENT / 1e-14) {
+ return Math.exp(-lambda - GammaDistribution.logGamma(x_plus_1));
+ }
+ else {
+ return rawProbability(x_plus_1, lambda) * (x_plus_1 / lambda);
+ }
+ }
+
+ /**
+ * Compute the poisson distribution PDF with an offset of + 1
+ *
+ * log pdf(x_plus_1 - 1, lambda)
+ *
+ * @param x_plus_1 x+1
+ * @param lambda Lambda
+ * @return pdf
+ */
+ public static double logpoissonPDFm1(double x_plus_1, double lambda) {
+ if(Double.isInfinite(lambda)) {
+ return Double.NEGATIVE_INFINITY;
+ }
+ if(x_plus_1 > 1) {
+ return rawLogProbability(x_plus_1 - 1, lambda);
+ }
+ if(lambda > Math.abs(x_plus_1 - 1) * MathUtil.LOG2 * Double.MAX_EXPONENT / 1e-14) {
+ return -lambda - GammaDistribution.logGamma(x_plus_1);
+ }
+ else {
+ return rawLogProbability(x_plus_1, lambda) + Math.log(x_plus_1 / lambda);
+ }
+ }
+
+ /**
+ * Calculates the Striling Error
+ *
+ * stirlerr(n) = ln(n!) - ln(sqrt(2*pi*n)*(n/e)^n)
+ *
+ * @param n Parameter n
+ * @return Stirling error
+ */
+ @Reference(title = "Fast and accurate computation of binomial probabilities", authors = "C. Loader", booktitle = "", url = "http://projects.scipy.org/scipy/raw-attachment/ticket/620/loader2000Fast.pdf")
+ private static double stirlingError(int n) {
+ // Try to use a table value:
+ if(n < 16) {
+ return STIRLING_EXACT_ERROR[n * 2];
+ }
+ final double nn = n * n;
+ // Use the appropriate number of terms
+ if(n > 500) {
+ return (S0 - S1 / nn) / n;
+ }
+ if(n > 80) {
+ return ((S0 - (S1 - S2 / nn)) / nn) / n;
+ }
+ if(n > 35) {
+ return ((S0 - (S1 - (S2 - S3 / nn) / nn) / nn) / n);
+ }
+ return ((S0 - (S1 - (S2 - (S3 - S4 / nn) / nn) / nn) / nn) / n);
+ }
+
+ /**
+ * Calculates the Striling Error
+ *
+ * stirlerr(n) = ln(n!) - ln(sqrt(2*pi*n)*(n/e)^n)
+ *
+ * @param n Parameter n
+ * @return Stirling error
+ */
+ @Reference(title = "Fast and accurate computation of binomial probabilities", authors = "C. Loader", booktitle = "", url = "http://projects.scipy.org/scipy/raw-attachment/ticket/620/loader2000Fast.pdf")
+ private static double stirlingError(double n) {
+ if(n < 16.0) {
+ // Our table has a step size of 0.5
+ final double n2 = 2.0 * n;
+ if(Math.floor(n2) == n2) { // Exact match
+ return STIRLING_EXACT_ERROR[(int) n2];
+ }
+ else {
+ return GammaDistribution.logGamma(n + 1.0) - (n + 0.5) * Math.log(n) + n - MathUtil.LOGSQRTTWOPI;
+ }
+ }
+ final double nn = n * n;
+ if(n > 500.0) {
+ return (S0 - S1 / nn) / n;
+ }
+ if(n > 80.0) {
+ return ((S0 - (S1 - S2 / nn)) / nn) / n;
+ }
+ if(n > 35.0) {
+ return ((S0 - (S1 - (S2 - S3 / nn) / nn) / nn) / n);
+ }
+ return ((S0 - (S1 - (S2 - (S3 - S4 / nn) / nn) / nn) / nn) / n);
+ }
+
+ /**
+ * Evaluate the deviance term of the saddle point approximation.
+ *
+ * bd0(x,np) = x*ln(x/np)+np-x
+ *
+ * @param x probability density function position
+ * @param np product of trials and success probability: n*p
+ * @return Deviance term
+ */
+ @Reference(title = "Fast and accurate computation of binomial probabilities", authors = "C. Loader", booktitle = "", url = "http://projects.scipy.org/scipy/raw-attachment/ticket/620/loader2000Fast.pdf")
+ private static double devianceTerm(double x, double np) {
+ if(Math.abs(x - np) < 0.1 * (x + np)) {
+ final double v = (x - np) / (x + np);
+
+ double s = (x - np) * v;
+ double ej = 2.0d * x * v;
+ for(int j = 1;; j++) {
+ ej *= v * v;
+ final double s1 = s + ej / (2 * j + 1);
+ if(s1 == s) {
+ return s1;
+ }
+ s = s1;
+ }
+ }
+ return x * Math.log(x / np) + np - x;
+ }
+
+ /**
+ * Poisson distribution probability, but also for non-integer arguments.
+ *
+ * lb^x exp(-lb) / x!
+ *
+ * @param x X
+ * @param lambda lambda
+ * @return Poisson distribution probability
+ */
+ public static double rawProbability(double x, double lambda) {
+ // Extreme lambda
+ if(lambda == 0) {
+ return ((x == 0) ? 1. : 0.);
+ }
+ // Extreme values
+ if(Double.isInfinite(lambda) || x < 0) {
+ return 0.;
+ }
+ if(x <= lambda * Double.MIN_NORMAL) {
+ return Math.exp(-lambda);
+ }
+ if(lambda < x * Double.MIN_NORMAL) {
+ double r = -lambda + x * Math.log(lambda) - GammaDistribution.logGamma(x + 1);
+ return Math.exp(r);
+ }
+ final double f = MathUtil.TWOPI * x;
+ final double y = -stirlingError(x) - devianceTerm(x, lambda);
+ return Math.exp(y) / Math.sqrt(f);
+ }
+
+ /**
+ * Poisson distribution probability, but also for non-integer arguments.
+ *
+ * lb^x exp(-lb) / x!
+ *
+ * @param x X
+ * @param lambda lambda
+ * @return Poisson distribution probability
+ */
+ public static double rawLogProbability(double x, double lambda) {
+ // Extreme lambda
+ if(lambda == 0) {
+ return ((x == 0) ? 1. : Double.NEGATIVE_INFINITY);
+ }
+ // Extreme values
+ if(Double.isInfinite(lambda) || x < 0) {
+ return Double.NEGATIVE_INFINITY;
+ }
+ if(x <= lambda * Double.MIN_NORMAL) {
+ return -lambda;
+ }
+ if(lambda < x * Double.MIN_NORMAL) {
+ return -lambda + x * Math.log(lambda) - GammaDistribution.logGamma(x + 1);
+ }
+ final double f = MathUtil.TWOPI * x;
+ final double y = -stirlingError(x) - devianceTerm(x, lambda);
+ return -0.5 * Math.log(f) + y;
+ }
+} \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/StudentsTDistribution.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/StudentsTDistribution.java
new file mode 100644
index 00000000..2e9e0d15
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/StudentsTDistribution.java
@@ -0,0 +1,90 @@
+package de.lmu.ifi.dbs.elki.math.statistics.distribution;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2012
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException;
+import de.lmu.ifi.dbs.elki.utilities.exceptions.ExceptionMessages;
+
+/**
+ * Student's t distribution.
+ *
+ * FIXME: add quantile function!
+ *
+ * @author Jan Brusis
+ */
+public class StudentsTDistribution implements Distribution {
+ /**
+ * Degrees of freedom
+ */
+ private final int v;
+
+ /**
+ * Constructor.
+ *
+ * @param v Degrees of freedom
+ */
+ public StudentsTDistribution(int v) {
+ this.v = v;
+ }
+
+ @Override
+ public double pdf(double val) {
+ return pdf(val, v);
+ }
+
+ @Override
+ public double cdf(double val) {
+ return cdf(val, v);
+ }
+
+ // FIXME: implement!
+ @Override
+ public double quantile(double val) {
+ throw new AbortException(ExceptionMessages.UNSUPPORTED_NOT_YET);
+ }
+
+ /**
+ * Static version of the t distribution's PDF.
+ *
+ * @param val value to evaluate
+ * @param v degrees of freedom
+ * @return f(val,v)
+ */
+ public static double pdf(double val, int v) {
+ // TODO: improve precision by computing "exp" last?
+ return Math.exp(GammaDistribution.logGamma((v + 1) / 2) - GammaDistribution.logGamma(v / 2)) * (1 / Math.sqrt(v * Math.PI)) * Math.pow(1 + (val * val) / v, -((v + 1) / 2));
+ }
+
+ /**
+ * Static version of the CDF of the t-distribution for t > 0
+ *
+ * @param val value to evaluate
+ * @param v degrees of freedom
+ * @return F(val, v)
+ */
+ public static double cdf(double val, int v) {
+ double x = v / (val * val + v);
+ return 1 - (0.5 * BetaDistribution.regularizedIncBeta(x, v / 2, 0.5));
+ }
+} \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/UniformDistribution.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/UniformDistribution.java
index 9571cfd3..4f54fbf9 100644
--- a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/UniformDistribution.java
+++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/UniformDistribution.java
@@ -30,7 +30,7 @@ import java.util.Random;
*
* @author Erich Schubert
*/
-public class UniformDistribution implements Distribution {
+public class UniformDistribution implements DistributionWithRandom {
/**
* Minimum
*/
@@ -100,20 +100,20 @@ public class UniformDistribution implements Distribution {
}
return (val - min) / len;
}
+
+ @Override
+ public double quantile(double val) {
+ return min + len * val;
+ }
@Override
public double nextRandom() {
return min + random.nextDouble() * len;
}
- /**
- * Simple toString explaining the distribution parameters.
- *
- * Used in describing cluster models.
- */
@Override
public String toString() {
- return "Uniform Distribution (min=" + min + ", max=" + max + ")";
+ return "UniformDistribution(min=" + min + ", max=" + max + ")";
}
/**
diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/tests/GoodnessOfFitTest.java b/src/de/lmu/ifi/dbs/elki/math/statistics/tests/GoodnessOfFitTest.java
new file mode 100644
index 00000000..f50f469f
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/math/statistics/tests/GoodnessOfFitTest.java
@@ -0,0 +1,49 @@
+package de.lmu.ifi.dbs.elki.math.statistics.tests;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2012
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.Parameterizable;
+
+/**
+ * Interface for the statistical test used by HiCS.
+ *
+ * Provides a single method that calculates the deviation between two data
+ * samples, given as arrays of double values
+ *
+ * @author Jan Brusis
+ * @author Erich Schubert
+ */
+public interface GoodnessOfFitTest extends Parameterizable {
+ /**
+ * Measure the deviation of a full sample from a conditional sample.
+ *
+ * Sample arrays *may* be modified, e.g. sorted, by the test.
+ *
+ * @param fullSample Full sample
+ * @param conditionalSample Conditional sample
+ *
+ * @return Deviation
+ */
+ public double deviation(double[] fullSample, double[] conditionalSample);
+}
diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/tests/KolmogorovSmirnovTest.java b/src/de/lmu/ifi/dbs/elki/math/statistics/tests/KolmogorovSmirnovTest.java
new file mode 100644
index 00000000..236f26b8
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/math/statistics/tests/KolmogorovSmirnovTest.java
@@ -0,0 +1,117 @@
+package de.lmu.ifi.dbs.elki.math.statistics.tests;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2012
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import java.util.Arrays;
+
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
+
+/**
+ * Kolmogorov-Smirnov test.
+ *
+ * Class that tests two given real-valued data samples on whether they might
+ * have originated from the same underlying distribution using the
+ * Kolmogorov-Smirnov test statistic that compares the two empirical cumulative
+ * distribution functions. The KS statistic is defined as the maximum absolute
+ * difference of the empirical CDFs.
+ *
+ * @author Erich Schubert
+ */
+public class KolmogorovSmirnovTest implements GoodnessOfFitTest {
+ /**
+ * Static instance
+ */
+ public static KolmogorovSmirnovTest STATIC = new KolmogorovSmirnovTest();
+
+ /**
+ * Constructor.
+ */
+ public KolmogorovSmirnovTest() {
+ super();
+ }
+
+ @Override
+ public double deviation(double[] fullSample, double[] conditionalSample) {
+ Arrays.sort(fullSample);
+ Arrays.sort(conditionalSample);
+ return calculateTestStatistic(fullSample, conditionalSample);
+ }
+
+ /**
+ * Calculates the maximum distance between the two empirical CDFs of two data
+ * samples. The sample positions and CDFs must be synchronized!
+ *
+ * @param sample1 first data sample positions
+ * @param sample2 second data sample positions
+ * @return the largest distance between both functions
+ */
+ public static double calculateTestStatistic(double[] sample1, double[] sample2) {
+ double maximum = 0.0;
+
+ int index1 = 0, index2 = 0;
+ double cdf1 = 0, cdf2 = 0;
+
+ // Parallel iteration over both curves. We can stop if we reach either end,
+ // As the difference can then only decrease!
+ while(index1 < sample1.length && index2 < sample2.length) {
+ // Next (!) positions
+ final double x1 = sample1[index1], x2 = sample2[index2];
+ // Advance on first curve
+ if(x1 <= x2) {
+ index1++;
+ // Handle multiple points with same x:
+ while(index1 < sample1.length && sample1[index1] == x1) {
+ index1++;
+ }
+ cdf1 = ((double) index1) / sample1.length;
+ }
+ // Advance on second curve
+ if(x1 >= x2) {
+ index2++;
+ // Handle multiple points with same x:
+ while(index2 < sample2.length && sample2[index2] == x2) {
+ index2++;
+ }
+ cdf2 = ((double) index2) / sample2.length;
+ }
+ maximum = Math.max(maximum, Math.abs(cdf1 - cdf2));
+ }
+
+ return maximum;
+ }
+
+ /**
+ * Parameterizer, to use the static instance.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ */
+ public static class Parameterizer extends AbstractParameterizer {
+ @Override
+ protected KolmogorovSmirnovTest makeInstance() {
+ return STATIC;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/tests/WelchTTest.java b/src/de/lmu/ifi/dbs/elki/math/statistics/tests/WelchTTest.java
new file mode 100644
index 00000000..554a22db
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/math/statistics/tests/WelchTTest.java
@@ -0,0 +1,124 @@
+package de.lmu.ifi.dbs.elki.math.statistics.tests;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2012
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import de.lmu.ifi.dbs.elki.math.MeanVariance;
+import de.lmu.ifi.dbs.elki.math.statistics.distribution.StudentsTDistribution;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
+
+/**
+ * Calculates a test statistic according to Welch's t test for two samples
+ * Supplies methods for calculating the degrees of freedom according to the
+ * Welch-Satterthwaite Equation. Also directly calculates a two-sided p-value
+ * for the underlying t-distribution
+ *
+ * @author Jan Brusis
+ * @author Erich Schubert
+ *
+ * @apiviz.uses StudentsTDistribution
+ */
+public class WelchTTest implements GoodnessOfFitTest {
+ /**
+ * Static instance.
+ */
+ public static final WelchTTest STATIC = new WelchTTest();
+
+ /**
+ * Constructor.
+ */
+ public WelchTTest() {
+ super();
+ }
+
+ @Override
+ public double deviation(double[] sample1, double[] sample2) {
+ MeanVariance mv1 = new MeanVariance(), mv2 = new MeanVariance();
+ for(double d : sample1) {
+ mv1.put(d);
+ }
+ for(double d : sample2) {
+ mv2.put(d);
+ }
+
+ final double t = calculateTestStatistic(mv1, mv2);
+ final int v = calculateDOF(mv1, mv2);
+ return 1 - calculatePValue(t, v);
+ }
+
+ /**
+ * Calculate the statistic of Welch's t test using statistical moments of the
+ * provided data samples
+ *
+ * @param mv1 Mean and variance of first sample
+ * @param mv2 Mean and variance of second sample
+ * @return Welch's t statistic
+ */
+ public static double calculateTestStatistic(MeanVariance mv1, MeanVariance mv2) {
+ final double delta = mv1.getMean() - mv2.getMean();
+ final double relvar1 = mv1.getSampleVariance() / mv1.getCount();
+ final double relvar2 = mv2.getSampleVariance() / mv2.getCount();
+ return delta / Math.sqrt(relvar1 + relvar2);
+ }
+
+ /**
+ * Calculates the degree of freedom according to Welch-Satterthwaite
+ *
+ * @param mv1 Mean and variance of first sample
+ * @param mv2 Mean and variance of second sample
+ * @return Estimated degrees of freedom.
+ */
+ public static int calculateDOF(MeanVariance mv1, MeanVariance mv2) {
+ final double relvar1 = mv1.getSampleVariance() / mv1.getCount();
+ final double relvar2 = mv2.getSampleVariance() / mv2.getCount();
+ final double wvariance = relvar1 + relvar2;
+ final double div = relvar1 * relvar1 / (mv1.getCount() - 1) + relvar2 * relvar2 / (mv2.getCount() - 1);
+ return (int) (wvariance * wvariance / div);
+ }
+
+ /**
+ * Calculates the two-sided p-Value of the underlying t-Distribution with v
+ * degrees of freedom
+ *
+ * @param t Integration limit
+ * @param v Degrees of freedom
+ * @return p-Value
+ */
+ public static double calculatePValue(double t, int v) {
+ return 2 * (1 - StudentsTDistribution.cdf(Math.abs(t), v));
+ }
+
+ /**
+ * Parameterizer, to use the static instance.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ */
+ public static class Parameterizer extends AbstractParameterizer {
+ @Override
+ protected WelchTTest makeInstance() {
+ return STATIC;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/tests/package-info.java b/src/de/lmu/ifi/dbs/elki/math/statistics/tests/package-info.java
new file mode 100644
index 00000000..3e39a519
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/math/statistics/tests/package-info.java
@@ -0,0 +1,26 @@
+/**
+ * <p>Statistical tests.</p>
+ */
+/*
+This file is part of ELKI:
+Environment for Developing KDD-Applications Supported by Index-Structures
+
+Copyright (C) 2012
+Ludwig-Maximilians-Universität München
+Lehr- und Forschungseinheit für Datenbanksysteme
+ELKI Development Team
+
+This program is free software: you can redistribute it and/or modify
+it under the terms of the GNU Affero General Public License as published by
+the Free Software Foundation, either version 3 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU Affero General Public License for more details.
+
+You should have received a copy of the GNU Affero General Public License
+along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+package de.lmu.ifi.dbs.elki.math.statistics.tests; \ No newline at end of file