summaryrefslogtreecommitdiff
path: root/src/de/lmu/ifi/dbs/elki/math/statistics
diff options
context:
space:
mode:
Diffstat (limited to 'src/de/lmu/ifi/dbs/elki/math/statistics')
-rw-r--r--src/de/lmu/ifi/dbs/elki/math/statistics/EpanechnikovKernelDensityFunction.java15
-rw-r--r--src/de/lmu/ifi/dbs/elki/math/statistics/GaussianKernelDensityFunction.java15
-rw-r--r--src/de/lmu/ifi/dbs/elki/math/statistics/KernelDensityEstimator.java4
-rw-r--r--src/de/lmu/ifi/dbs/elki/math/statistics/MultipleLinearRegression.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/math/statistics/StudentDistribution.java189
-rw-r--r--src/de/lmu/ifi/dbs/elki/math/statistics/TriangularKernelDensityFunction.java15
-rw-r--r--src/de/lmu/ifi/dbs/elki/math/statistics/UniformKernelDensityFunction.java15
-rw-r--r--src/de/lmu/ifi/dbs/elki/math/statistics/distribution/ChiDistribution.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/math/statistics/distribution/ChiSquaredDistribution.java4
-rw-r--r--src/de/lmu/ifi/dbs/elki/math/statistics/distribution/Distribution.java10
-rw-r--r--src/de/lmu/ifi/dbs/elki/math/statistics/distribution/DistributionWithRandom.java4
-rw-r--r--src/de/lmu/ifi/dbs/elki/math/statistics/distribution/ExponentialDistribution.java126
-rw-r--r--src/de/lmu/ifi/dbs/elki/math/statistics/distribution/HaltonUniformDistribution.java313
-rw-r--r--src/de/lmu/ifi/dbs/elki/math/statistics/distribution/NormalDistribution.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/math/statistics/distribution/PoissonDistribution.java14
-rw-r--r--src/de/lmu/ifi/dbs/elki/math/statistics/distribution/StudentsTDistribution.java4
-rw-r--r--src/de/lmu/ifi/dbs/elki/math/statistics/tests/GoodnessOfFitTest.java6
17 files changed, 525 insertions, 215 deletions
diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/EpanechnikovKernelDensityFunction.java b/src/de/lmu/ifi/dbs/elki/math/statistics/EpanechnikovKernelDensityFunction.java
index ed9d8d58..25aa2ea7 100644
--- a/src/de/lmu/ifi/dbs/elki/math/statistics/EpanechnikovKernelDensityFunction.java
+++ b/src/de/lmu/ifi/dbs/elki/math/statistics/EpanechnikovKernelDensityFunction.java
@@ -23,6 +23,7 @@ package de.lmu.ifi.dbs.elki.math.statistics;
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
/**
* Epanechnikov kernel density estimator.
@@ -49,4 +50,18 @@ public final class EpanechnikovKernelDensityFunction implements KernelDensityFun
* Static instance.
*/
public static final EpanechnikovKernelDensityFunction KERNEL = new EpanechnikovKernelDensityFunction();
+
+ /**
+ * Parameterization stub.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ */
+ public static class Parameterizer extends AbstractParameterizer {
+ @Override
+ protected EpanechnikovKernelDensityFunction makeInstance() {
+ return KERNEL;
+ }
+ }
}
diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/GaussianKernelDensityFunction.java b/src/de/lmu/ifi/dbs/elki/math/statistics/GaussianKernelDensityFunction.java
index 744a9108..2cd15408 100644
--- a/src/de/lmu/ifi/dbs/elki/math/statistics/GaussianKernelDensityFunction.java
+++ b/src/de/lmu/ifi/dbs/elki/math/statistics/GaussianKernelDensityFunction.java
@@ -23,6 +23,7 @@ package de.lmu.ifi.dbs.elki.math.statistics;
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
/**
* Gaussian kernel density estimator.
@@ -51,4 +52,18 @@ public final class GaussianKernelDensityFunction implements KernelDensityFunctio
* Static instance.
*/
public static final GaussianKernelDensityFunction KERNEL = new GaussianKernelDensityFunction();
+
+ /**
+ * Parameterization stub.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ */
+ public static class Parameterizer extends AbstractParameterizer {
+ @Override
+ protected GaussianKernelDensityFunction makeInstance() {
+ return KERNEL;
+ }
+ }
}
diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/KernelDensityEstimator.java b/src/de/lmu/ifi/dbs/elki/math/statistics/KernelDensityEstimator.java
index 3bb0e1f6..d7ffefb8 100644
--- a/src/de/lmu/ifi/dbs/elki/math/statistics/KernelDensityEstimator.java
+++ b/src/de/lmu/ifi/dbs/elki/math/statistics/KernelDensityEstimator.java
@@ -71,7 +71,7 @@ public class KernelDensityEstimator {
dens = new double[data.length];
var = new double[data.length];
- double halfwidth = ((max - min) / windows) / 2;
+ double halfwidth = ((max - min) / windows) * .5;
// collect data points
for(int current = 0; current < data.length; current++) {
@@ -84,7 +84,7 @@ public class KernelDensityEstimator {
}
double realwidth = (Math.min(data[current] + halfwidth, max) - Math.max(min, data[current] - halfwidth));
double weight = realwidth / (2 * halfwidth);
- dens[current] = value / (data.length * realwidth / 2);
+ dens[current] = value / (data.length * realwidth * .5);
var[current] = 1 / weight;
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/MultipleLinearRegression.java b/src/de/lmu/ifi/dbs/elki/math/statistics/MultipleLinearRegression.java
index 0e674146..79c84701 100644
--- a/src/de/lmu/ifi/dbs/elki/math/statistics/MultipleLinearRegression.java
+++ b/src/de/lmu/ifi/dbs/elki/math/statistics/MultipleLinearRegression.java
@@ -138,7 +138,7 @@ public class MultipleLinearRegression {
*/
@Override
public String toString() {
- StringBuffer msg = new StringBuffer();
+ StringBuilder msg = new StringBuilder();
msg.append("x = ").append(FormatUtil.format(x, 9, 4));
msg.append("\ny = ").append(FormatUtil.format(y, 9, 4));
msg.append("\nb = ").append(FormatUtil.format(b, 9, 4));
diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/StudentDistribution.java b/src/de/lmu/ifi/dbs/elki/math/statistics/StudentDistribution.java
deleted file mode 100644
index a8f45f9d..00000000
--- a/src/de/lmu/ifi/dbs/elki/math/statistics/StudentDistribution.java
+++ /dev/null
@@ -1,189 +0,0 @@
-package de.lmu.ifi.dbs.elki.math.statistics;
-
-/*
- This file is part of ELKI:
- Environment for Developing KDD-Applications Supported by Index-Structures
-
- Copyright (C) 2012
- Ludwig-Maximilians-Universität München
- Lehr- und Forschungseinheit für Datenbanksysteme
- ELKI Development Team
-
- This program is free software: you can redistribute it and/or modify
- it under the terms of the GNU Affero General Public License as published by
- the Free Software Foundation, either version 3 of the License, or
- (at your option) any later version.
-
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU Affero General Public License for more details.
-
- You should have received a copy of the GNU Affero General Public License
- along with this program. If not, see <http://www.gnu.org/licenses/>.
- */
-
-import gnu.trove.map.TDoubleDoubleMap;
-import gnu.trove.map.TIntObjectMap;
-import gnu.trove.map.hash.TDoubleDoubleHashMap;
-import gnu.trove.map.hash.TIntObjectHashMap;
-
-/**
- * Tabelarizes the values for student distribution.
- *
- * @author Elke Achtert
- */
-public class StudentDistribution {
- /**
- * Available alpha values.
- */
- public static double _6000 = 0.6;
-
- /**
- * Available alpha values.
- */
- public static double _8000 = 0.8;
-
- /**
- * Available alpha values.
- */
- public static double _9000 = 0.9;
-
- /**
- * Available alpha values.
- */
- public static double _9500 = 0.95;
-
- /**
- * Available alpha values.
- */
- public static double _9750 = 0.975;
-
- /**
- * Available alpha values.
- */
- public static double _9900 = 0.99;
-
- /**
- * Available alpha values.
- */
- public static double _9950 = 0.995;
-
- /**
- * Available alpha values.
- */
- public static double _9990 = 0.999;
-
- /**
- * Available alpha values.
- */
- public static double _9995 = 0.9995;
-
- /**
- * Available alpha values.
- */
- public static double _4000 = 0.4;
-
- /**
- * Available alpha values.
- */
- public static double _2000 = 0.2;
-
- /**
- * Available alpha values.
- */
- public static double _1000 = 0.1;
-
- /**
- * Available alpha values.
- */
- public static double _0500 = 0.05;
-
- /**
- * Available alpha values.
- */
- public static double _0250 = 0.025;
-
- /**
- * Available alpha values.
- */
- public static double _0100 = 0.01;
-
- /**
- * Available alpha values.
- */
- public static double _0050 = 0.005;
-
- /**
- * Available alpha values.
- */
- public static double _0010 = 0.001;
-
- /**
- * Available alpha values.
- */
- public static double _0005 = 0.005;
-
- /**
- * Holds the t-values.
- */
- private static TIntObjectMap<TDoubleDoubleMap> tValues = new TIntObjectHashMap<TDoubleDoubleMap>();
-
- static {
- put(31, new double[] { 0.2533, 0.8416, 1.2816, 1.6449, 1.96, 2.3263, 2.5758, 3.0903, 3.2906 });
- }
-
- /**
- * Returns the t-value for the given alpha-value and degree of freedom.
- *
- * @param alpha the alpha value
- * @param n the degree of freedom
- * @return the t-value for the given alpha-value and degree of freedom
- */
- public static double tValue(double alpha, int n) {
- if(n > 30) {
- n = 31;
- }
- TDoubleDoubleMap map = tValues.get(n);
- if(map == null) {
- throw new IllegalArgumentException("t-values for n=" + n + " not yet tabularized!");
- }
-
- Double value = map.get(alpha);
- if(value == null) {
- throw new IllegalArgumentException("t-values for alpha=" + alpha + " not tabularized!");
- }
-
- return value;
- }
-
- /**
- * Stores the specified t-values for the given degree of freedom.
- *
- * @param n the degree of freedom
- * @param values the t-values
- */
- private static void put(int n, double[] values) {
- TDoubleDoubleMap map = new TDoubleDoubleHashMap();
- map.put(_6000, values[0]);
- map.put(_8000, values[1]);
- map.put(_9000, values[2]);
- map.put(_9500, values[3]);
- map.put(_9750, values[4]);
- map.put(_9900, values[5]);
- map.put(_9950, values[6]);
- map.put(_9990, values[7]);
- map.put(_9995, values[8]);
-
- map.put(_4000, -values[0]);
- map.put(_2000, -values[1]);
- map.put(_1000, -values[2]);
- map.put(_0500, -values[3]);
- map.put(_0250, -values[4]);
- map.put(_0100, -values[5]);
- map.put(_0050, -values[6]);
- map.put(_0010, -values[7]);
- map.put(_0005, -values[8]);
- tValues.put(n, map);
- }
-} \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/TriangularKernelDensityFunction.java b/src/de/lmu/ifi/dbs/elki/math/statistics/TriangularKernelDensityFunction.java
index e3c23b2a..aee544de 100644
--- a/src/de/lmu/ifi/dbs/elki/math/statistics/TriangularKernelDensityFunction.java
+++ b/src/de/lmu/ifi/dbs/elki/math/statistics/TriangularKernelDensityFunction.java
@@ -23,6 +23,7 @@ package de.lmu.ifi.dbs.elki.math.statistics;
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
/**
* Triangular kernel density estimator.
@@ -49,4 +50,18 @@ public final class TriangularKernelDensityFunction implements KernelDensityFunct
* Static instance.
*/
public static final TriangularKernelDensityFunction KERNEL = new TriangularKernelDensityFunction();
+
+ /**
+ * Parameterization stub.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ */
+ public static class Parameterizer extends AbstractParameterizer {
+ @Override
+ protected TriangularKernelDensityFunction makeInstance() {
+ return KERNEL;
+ }
+ }
}
diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/UniformKernelDensityFunction.java b/src/de/lmu/ifi/dbs/elki/math/statistics/UniformKernelDensityFunction.java
index 8d85528f..66fe7888 100644
--- a/src/de/lmu/ifi/dbs/elki/math/statistics/UniformKernelDensityFunction.java
+++ b/src/de/lmu/ifi/dbs/elki/math/statistics/UniformKernelDensityFunction.java
@@ -23,6 +23,7 @@ package de.lmu.ifi.dbs.elki.math.statistics;
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
/**
* Uniform / Rectangular kernel density estimator.
@@ -49,4 +50,18 @@ public final class UniformKernelDensityFunction implements KernelDensityFunction
* Static instance.
*/
public static final UniformKernelDensityFunction KERNEL = new UniformKernelDensityFunction();
+
+ /**
+ * Parameterization stub.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ */
+ public static class Parameterizer extends AbstractParameterizer {
+ @Override
+ protected UniformKernelDensityFunction makeInstance() {
+ return KERNEL;
+ }
+ }
}
diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/ChiDistribution.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/ChiDistribution.java
index 84c86e98..5dc5b399 100644
--- a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/ChiDistribution.java
+++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/ChiDistribution.java
@@ -91,7 +91,7 @@ public class ChiDistribution implements DistributionWithRandom {
* @return CDF value
*/
public static double cdf(double val, double dof) {
- return GammaDistribution.regularizedGammaP(dof / 2, val * val / 2);
+ return GammaDistribution.regularizedGammaP(dof * .5, val * val * .5);
}
// FIXME: implement!
diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/ChiSquaredDistribution.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/ChiSquaredDistribution.java
index 8555afd3..efa24079 100644
--- a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/ChiSquaredDistribution.java
+++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/ChiSquaredDistribution.java
@@ -65,8 +65,8 @@ public class ChiSquaredDistribution extends GammaDistribution {
if(x == 0) {
return 0.0;
}
- final double k = dof / 2;
- if(k == 1.0) {
+ final double k = dof * .5;
+ if(Math.abs(k - 1.0) < Double.MIN_NORMAL) {
return Math.exp(-x * 2.0) * 2.0;
}
return Math.exp((k - 1.0) * Math.log(x * 2.0) - x * 2.0 - logGamma(k)) * 2.0;
diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/Distribution.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/Distribution.java
index 5b6cd286..ad4ef944 100644
--- a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/Distribution.java
+++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/Distribution.java
@@ -37,7 +37,7 @@ public interface Distribution {
* @param val existing value
* @return distribution density
*/
- public double pdf(double val);
+ double pdf(double val);
/**
* Return the cumulative density function at the given value.
@@ -45,7 +45,7 @@ public interface Distribution {
* @param val existing value
* @return cumulative density
*/
- public double cdf(double val);
+ double cdf(double val);
/**
* Quantile aka probit (for normal) aka inverse CDF (invcdf, cdf^-1) function.
@@ -53,7 +53,7 @@ public interface Distribution {
* @param val Quantile to find
* @return Quantile position
*/
- public double quantile(double val);
+ double quantile(double val);
/**
* Describe the distribution
@@ -61,5 +61,5 @@ public interface Distribution {
* @return description
*/
@Override
- public String toString();
-}
+ String toString();
+} \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/DistributionWithRandom.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/DistributionWithRandom.java
index af272528..02f5002f 100644
--- a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/DistributionWithRandom.java
+++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/DistributionWithRandom.java
@@ -33,5 +33,5 @@ public interface DistributionWithRandom extends Distribution {
*
* @return new random value
*/
- public double nextRandom();
-}
+ double nextRandom();
+} \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/ExponentialDistribution.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/ExponentialDistribution.java
new file mode 100644
index 00000000..866f40d6
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/ExponentialDistribution.java
@@ -0,0 +1,126 @@
+package de.lmu.ifi.dbs.elki.math.statistics.distribution;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2012
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import java.util.Random;
+
+/**
+ * Exponential distribution.
+ *
+ * @author Erich Schubert
+ */
+public class ExponentialDistribution implements DistributionWithRandom {
+ /**
+ * Random generator.
+ */
+ Random rnd;
+
+ /**
+ * Rate, inverse of mean
+ */
+ double rate;
+
+ /**
+ * Constructor.
+ *
+ * @param rate Rate parameter (1/scale)
+ */
+ public ExponentialDistribution(double rate) {
+ this(rate, new Random());
+ }
+
+ /**
+ * Constructor.
+ *
+ * @param rate Rate parameter (1/scale)
+ * @param random Random generator
+ */
+ public ExponentialDistribution(double rate, Random random) {
+ super();
+ this.rate = rate;
+ this.rnd = random;
+ }
+
+ @Override
+ public double pdf(double val) {
+ return rate * Math.exp(-rate * val);
+ }
+
+ /**
+ * PDF, static version
+ *
+ * @param val Value to compute PDF at
+ * @param rate Rate parameter (1/scale)
+ * @return probability density
+ */
+ public static double pdf(double val, double rate) {
+ return rate * Math.exp(-rate * val);
+ }
+
+ @Override
+ public double cdf(double val) {
+ return 1 - Math.exp(-rate * val);
+ }
+
+ /**
+ * Cumulative density, static version
+ *
+ * @param val Value to compute CDF at
+ * @param rate Rate parameter (1/scale)
+ * @return cumulative density
+ */
+ public static double cdf(double val, double rate) {
+ return 1 - Math.exp(-rate * val);
+ }
+
+ @Override
+ public double quantile(double val) {
+ return -Math.log(1 - val) / rate;
+ }
+
+ /**
+ * Quantile function, static version
+ *
+ * @param val Value to compute quantile for
+ * @param rate Rate parameter
+ * @return Quantile
+ */
+ public static double quantile(double val, double rate) {
+ return -Math.log(1 - val) / rate;
+ }
+
+ /**
+ * This method currently uses the naive approach of returning
+ * <code>-log(uniform)</code>.
+ *
+ * TODO: there are variants that do not rely on the log method and are faster.
+ * We need to implement and evaluate these. For details: see
+ * "Computer methods for sampling from the exponential and normal distributions"
+ * J. H. Ahrens, U. Dieter, https://dl.acm.org/citation.cfm?id=361593
+ */
+ @Override
+ public double nextRandom() {
+ return -Math.log(rnd.nextDouble()) / rate;
+ }
+} \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/HaltonUniformDistribution.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/HaltonUniformDistribution.java
new file mode 100644
index 00000000..df8fecda
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/HaltonUniformDistribution.java
@@ -0,0 +1,313 @@
+package de.lmu.ifi.dbs.elki.math.statistics.distribution;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2012
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import java.util.Random;
+
+import de.lmu.ifi.dbs.elki.math.MathUtil;
+import de.lmu.ifi.dbs.elki.math.Primes;
+import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
+
+/**
+ * Halton sequences are a pseudo-uniform distribution. The data is actually too
+ * regular for a true uniform distribution, but as such will of course often
+ * appear to be uniform.
+ *
+ * Technically, they are based on Van der Corput sequence and the Von Neumann
+ * Katutani transformation. These produce a series of integers which then are
+ * converted to floating point values.
+ *
+ * To randomize, we just choose a random starting position, as indicated by
+ *
+ * Reference:
+ * <p>
+ * Randomized halton sequences<br>
+ * Wang, X. and Hickernell, F.J.<br />
+ * Mathematical and Computer Modelling Vol. 32 (7)
+ * </p>
+ *
+ * <b>Important note: this code hasn't been double checked yet. While it
+ * probably works for some simple cases such as example data set generation, do
+ * <em>not</em> rely on it for e.g. quasi monte carlo methods without
+ * double-checking the quality, and looking at more advanced methods!</b>
+ *
+ * Let me repeat this: this code was written <b>to generate toy datasets</b>. It
+ * <b>may have deficits</b> for other uses! <b>There is a high chance it will
+ * produce correlated data when used for more than one dimension.</b> - for toy
+ * data sets, try different random seeds until you find one that works for you.
+ *
+ * TODO: find an improved algorithm that takes care of a better randomization,
+ * for example by adding scrambling.
+ *
+ * @author Erich Schubert
+ */
+@Reference(title = "Randomized halton sequences", authors = "Wang, X. and Hickernell, F.J.", booktitle = "Mathematical and Computer Modelling Vol. 32 (7)", url = "http://dx.doi.org/10.1016/S0895-7177(00)00178-3")
+public class HaltonUniformDistribution implements DistributionWithRandom {
+ /**
+ * Minimum
+ */
+ private double min;
+
+ /**
+ * Maximum
+ */
+ private double max;
+
+ /**
+ * Len := max - min
+ */
+ private double len;
+
+ /**
+ * Maximum number of iterations of fast variant
+ */
+ private static final int MAXFAST = 1000;
+
+ /**
+ * Threshold
+ */
+ private static final double ALMOST_ONE = 1.0 - 1e-10;
+
+ /**
+ * Base value
+ */
+ final short base;
+
+ /**
+ * Inverse of base, for faster division by multiplication.
+ */
+ final double invbase;
+
+ /**
+ * Logarithm of base.
+ */
+ final double logbase;
+
+ /**
+ * Maximum integer to use
+ */
+ final int maxi;
+
+ /**
+ * Counter, for max iterations of fast function.
+ */
+ int counter = 0;
+
+ /**
+ * Current value
+ */
+ double current;
+
+ /**
+ * Integer inverse
+ */
+ long inverse;
+
+ /**
+ * Constructor for a halton pseudo uniform distribution on the interval [min,
+ * max[
+ *
+ * @param min Minimum value
+ * @param max Maximum value
+ * @param base Base value
+ * @param seed Random seed (starting value)
+ */
+ public HaltonUniformDistribution(double min, double max, int base, double seed) {
+ super();
+ // Swap parameters if they were given incorrectly.
+ if (min > max) {
+ double tmp = min;
+ min = max;
+ max = tmp;
+ }
+ this.min = min;
+ this.max = max;
+ this.len = max - min;
+
+ this.base = (short) base;
+ this.invbase = 1.0 / base;
+ this.logbase = Math.log(base);
+ // 32 bit * log(2) / log(base)
+ this.maxi = (int) (32.0 * MathUtil.LOG2 / logbase);
+ this.current = seed;
+ this.inverse = inverse(seed);
+ }
+
+ /**
+ * Constructor for a halton pseudo uniform distribution on the interval [min,
+ * max[
+ *
+ * @param min Minimum value
+ * @param max Maximum value
+ */
+ public HaltonUniformDistribution(double min, double max) {
+ // TODO: use different starting primes?
+ this(min, max, new Random());
+ }
+
+ /**
+ * Constructor for a halton pseudo uniform distribution on the interval [min,
+ * max[
+ *
+ * @param min Minimum value
+ * @param max Maximum value
+ * @param rnd Random generator
+ */
+ public HaltonUniformDistribution(double min, double max, Random rnd) {
+ // TODO: use different starting primes?
+ this(min, max, choosePrime(rnd), rnd.nextDouble());
+ }
+
+ /**
+ * Choose a random prime. We try to avoid the later primes, as they are known
+ * to cause too correlated data.
+ *
+ * @param rnd Random generator
+ * @return Prime
+ */
+ private static int choosePrime(Random rnd) {
+ return Primes.FIRST_PRIMES[rnd.nextInt(10)];
+ }
+
+ @Override
+ public double pdf(double val) {
+ if (val < min || val >= max) {
+ return 0.0;
+ }
+ return 1.0 / len;
+ }
+
+ @Override
+ public double cdf(double val) {
+ if (val < min) {
+ return 0.0;
+ }
+ if (val > max) {
+ return 1.0;
+ }
+ return (val - min) / len;
+ }
+
+ @Override
+ public double quantile(double val) {
+ return min + len * val;
+ }
+
+ /**
+ * Compute the inverse with respect to the given base.
+ *
+ * @param current Current value
+ * @return Integer inverse
+ */
+ private long inverse(double current) {
+ // Represent to base b.
+ short[] digits = new short[maxi];
+ int j;
+ for (j = 0; j < maxi; j++) {
+ current *= base;
+ digits[j] = (short) current;
+ current -= digits[j];
+ if (current <= 1e-10) {
+ break;
+ }
+ }
+ long inv = 0;
+ for (j = maxi - 1; j >= 0; j--) {
+ inv = inv * base + digits[j];
+ }
+ return inv;
+ }
+
+ /**
+ * Compute the radical inverse of i.
+ *
+ * @param i Input long value
+ * @return Double radical inverse
+ */
+ private double radicalInverse(long i) {
+ double digit = 1.0 / (double) base;
+ double radical = digit;
+ double inverse = 0.0;
+ while (i > 0) {
+ inverse += digit * (double) (i % base);
+ digit *= radical;
+ i /= base;
+ }
+ return inverse;
+ }
+
+ /**
+ * Compute the next radical inverse.
+ *
+ * @return Next inverse
+ */
+ private double nextRadicalInverse() {
+ counter++;
+ // Do at most MAXFAST appromate steps
+ if (counter >= MAXFAST) {
+ counter = 0;
+ inverse += MAXFAST;
+ current = radicalInverse(inverse);
+ return current;
+ }
+ // Fast approximation:
+ double nextInverse = current + invbase;
+ if (nextInverse < ALMOST_ONE) {
+ current = nextInverse;
+ return current;
+ } else {
+ double digit1 = invbase, digit2 = invbase * invbase;
+ while (current + digit2 >= ALMOST_ONE) {
+ digit1 = digit2;
+ digit2 *= invbase;
+ }
+ current += (digit1 - 1.0) + digit2;
+ return current;
+ }
+ }
+
+ @Override
+ public double nextRandom() {
+ return min + nextRadicalInverse() * len;
+ }
+
+ @Override
+ public String toString() {
+ return "HaltonUniformDistribution(min=" + min + ", max=" + max + ")";
+ }
+
+ /**
+ * @return the minimum value
+ */
+ public double getMin() {
+ return min;
+ }
+
+ /**
+ * @return the maximum value
+ */
+ public double getMax() {
+ return max;
+ }
+}
diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/NormalDistribution.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/NormalDistribution.java
index 9180b59e..1845dec1 100644
--- a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/NormalDistribution.java
+++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/NormalDistribution.java
@@ -315,7 +315,7 @@ public class NormalDistribution implements DistributionWithRandom {
public static double pdf(double x, double mu, double sigma) {
final double x_mu = x - mu;
final double sigmasq = sigma * sigma;
- return 1 / (Math.sqrt(MathUtil.TWOPI * sigmasq)) * Math.exp(-1 * x_mu * x_mu / 2 / sigmasq);
+ return 1 / (Math.sqrt(MathUtil.TWOPI * sigmasq)) * Math.exp(-.5 * x_mu * x_mu / sigmasq);
}
/**
diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/PoissonDistribution.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/PoissonDistribution.java
index 53fb0dc8..a4ea9402 100644
--- a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/PoissonDistribution.java
+++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/PoissonDistribution.java
@@ -53,26 +53,26 @@ public class PoissonDistribution implements Distribution {
private double p;
/** Stirling error constants: 1./12 */
- private final static double S0 = 0.08333333333333333333333d;
+ private static final double S0 = 0.08333333333333333333333d;
/** Stirling error constants: 1./360 */
- private final static double S1 = 0.0027777777777777777777777777778d;
+ private static final double S1 = 0.0027777777777777777777777777778d;
/** Stirling error constants: 1./1260 */
- private final static double S2 = 0.00079365079365079365079365d;
+ private static final double S2 = 0.00079365079365079365079365d;
/** Stirling error constants: 1./1680 */
- private final static double S3 = 0.000595238095238095238095238095d;
+ private static final double S3 = 0.000595238095238095238095238095d;
/** Stirling error constants: 1./1188 */
- private final static double S4 = 0.00084175084175084175084175084175d;
+ private static final double S4 = 0.00084175084175084175084175084175d;
/**
* Exact table values for n <= 15 in steps of 0.5
*
* sfe[n] = ln( (n!*e^n)/((n^n)*sqrt(2*pi*n)) )
*/
- private final static double STIRLING_EXACT_ERROR[] = {//
+ private static final double STIRLING_EXACT_ERROR[] = {//
0.0, // 0.0
0.1534264097200273452913848, // 0.5
0.0810614667953272582196702, // 1.0
@@ -273,7 +273,7 @@ public class PoissonDistribution implements Distribution {
private static double stirlingError(int n) {
// Try to use a table value:
if(n < 16) {
- return STIRLING_EXACT_ERROR[n * 2];
+ return STIRLING_EXACT_ERROR[n << 1];
}
final double nn = n * n;
// Use the appropriate number of terms
diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/StudentsTDistribution.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/StudentsTDistribution.java
index 2e9e0d15..fcb96c12 100644
--- a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/StudentsTDistribution.java
+++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/StudentsTDistribution.java
@@ -73,7 +73,7 @@ public class StudentsTDistribution implements Distribution {
*/
public static double pdf(double val, int v) {
// TODO: improve precision by computing "exp" last?
- return Math.exp(GammaDistribution.logGamma((v + 1) / 2) - GammaDistribution.logGamma(v / 2)) * (1 / Math.sqrt(v * Math.PI)) * Math.pow(1 + (val * val) / v, -((v + 1) / 2));
+ return Math.exp(GammaDistribution.logGamma((v + 1) * .5) - GammaDistribution.logGamma(v * .5)) * (1 / Math.sqrt(v * Math.PI)) * Math.pow(1 + (val * val) / v, -((v + 1) * .5));
}
/**
@@ -85,6 +85,6 @@ public class StudentsTDistribution implements Distribution {
*/
public static double cdf(double val, int v) {
double x = v / (val * val + v);
- return 1 - (0.5 * BetaDistribution.regularizedIncBeta(x, v / 2, 0.5));
+ return 1 - (0.5 * BetaDistribution.regularizedIncBeta(x, v * .5, 0.5));
}
} \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/tests/GoodnessOfFitTest.java b/src/de/lmu/ifi/dbs/elki/math/statistics/tests/GoodnessOfFitTest.java
index f50f469f..363dbfea 100644
--- a/src/de/lmu/ifi/dbs/elki/math/statistics/tests/GoodnessOfFitTest.java
+++ b/src/de/lmu/ifi/dbs/elki/math/statistics/tests/GoodnessOfFitTest.java
@@ -38,12 +38,12 @@ public interface GoodnessOfFitTest extends Parameterizable {
/**
* Measure the deviation of a full sample from a conditional sample.
*
- * Sample arrays *may* be modified, e.g. sorted, by the test.
+ * Sample arrays <em>may</em> be modified, e.g. sorted, by the test.
*
* @param fullSample Full sample
* @param conditionalSample Conditional sample
*
* @return Deviation
*/
- public double deviation(double[] fullSample, double[] conditionalSample);
-}
+ double deviation(double[] fullSample, double[] conditionalSample);
+} \ No newline at end of file