diff options
Diffstat (limited to 'src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator')
67 files changed, 6186 insertions, 0 deletions
diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/AbstractExpMADEstimator.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/AbstractExpMADEstimator.java new file mode 100644 index 00000000..6f9dc541 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/AbstractExpMADEstimator.java @@ -0,0 +1,96 @@ +package de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +import de.lmu.ifi.dbs.elki.math.statistics.distribution.Distribution; +import de.lmu.ifi.dbs.elki.utilities.datastructures.QuickSelect; +import de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike.NumberArrayAdapter; + +/** + * Abstract base class for estimators based on the median and MAD. + * + * @author Erich Schubert + * + * @param <D> Distribution to generate. + */ +public abstract class AbstractExpMADEstimator<D extends Distribution> implements ExpMADDistributionEstimator<D> { + /** + * Constructor. + */ + public AbstractExpMADEstimator() { + super(); + } + + @Override + public abstract D estimateFromExpMedianMAD(double median, double mad); + + @Override + public <A> D estimate(A data, NumberArrayAdapter<?, A> adapter) { + // TODO: detect pre-sorted data? + final int len = adapter.size(data); + // Modifiable copy: + double[] x = new double[len]; + for (int i = 0; i < len; i++) { + x[i] = Math.exp(adapter.getDouble(data, i)); + } + double median = QuickSelect.median(x); + double mad = computeMAD(x, median); + return estimateFromExpMedianMAD(median, mad); + } + + /** + * Compute the median absolute deviation from median. + * + * @param x Input data <b>will be modified</b> + * @param median Median value. + * @return Median absolute deviation from median. + */ + public static double computeMAD(double[] x, double median) { + // Compute deviations: + for (int i = 0; i < x.length; i++) { + x[i] = Math.abs(x[i] - median); + } + double mad = QuickSelect.median(x); + // Fallback if we have more than 50% ties to next largest. + if (!(mad > 0.)) { + double min = Double.POSITIVE_INFINITY; + for (double xi : x) { + if (xi > 0. && xi < min) { + min = xi; + } + } + if (min < Double.POSITIVE_INFINITY) { + mad = min; + } else { + mad = 1.0; // Maybe all constant. No real value. + } + } + return mad; + } + + @Override + public String toString() { + return this.getClass().getSimpleName(); + } +} diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/AbstractLMMEstimator.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/AbstractLMMEstimator.java new file mode 100644 index 00000000..3d77a1e6 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/AbstractLMMEstimator.java @@ -0,0 +1,71 @@ +package de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +import java.util.Arrays; + +import de.lmu.ifi.dbs.elki.math.statistics.ProbabilityWeightedMoments; +import de.lmu.ifi.dbs.elki.math.statistics.distribution.Distribution; +import de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike.ArrayLikeUtil; +import de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike.NumberArrayAdapter; + +/** + * Abstract base class for L-Moments based estimators (LMM). + * + * @author Erich Schubert + * + * @param <D> Distribution class. + */ +public abstract class AbstractLMMEstimator<D extends Distribution> implements LMMDistributionEstimator<D> { + /** + * Constructor. + */ + public AbstractLMMEstimator() { + super(); + } + + @Override + public <A> D estimate(A data, NumberArrayAdapter<?, A> adapter) { + // Sort: + final int size = adapter.size(data); + double[] sorted = new double[size]; + for (int i = 0; i < size; i++) { + sorted[i] = adapter.getDouble(data, i); + } + Arrays.sort(sorted); + double[] xmom = ProbabilityWeightedMoments.samLMR(sorted, ArrayLikeUtil.DOUBLEARRAYADAPTER, getNumMoments()); + return estimateFromLMoments(xmom); + } + + @Override + abstract public D estimateFromLMoments(double[] xmom); + + @Override + abstract public int getNumMoments(); + + @Override + public String toString() { + return this.getClass().getSimpleName(); + } +} diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/AbstractLogMADEstimator.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/AbstractLogMADEstimator.java new file mode 100644 index 00000000..b4e4e095 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/AbstractLogMADEstimator.java @@ -0,0 +1,101 @@ +package de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +import de.lmu.ifi.dbs.elki.math.statistics.distribution.Distribution; +import de.lmu.ifi.dbs.elki.utilities.datastructures.QuickSelect; +import de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike.NumberArrayAdapter; + +/** + * Abstract base class for estimators based on the median and MAD. + * + * @author Erich Schubert + * + * @param <D> Distribution to generate. + */ +public abstract class AbstractLogMADEstimator<D extends Distribution> implements LogMADDistributionEstimator<D> { + /** + * Constructor. + */ + public AbstractLogMADEstimator() { + super(); + } + + @Override + public abstract D estimateFromLogMedianMAD(double median, double mad, double shift); + + @Override + public <A> D estimate(A data, NumberArrayAdapter<?, A> adapter) { + // TODO: detect pre-sorted data? + final int len = adapter.size(data); + double min = AbstractLogMOMEstimator.min(data, adapter, 0., 1e-10); + // Modifiable copy: + double[] x = new double[len]; + for (int i = 0; i < len; i++) { + final double val = adapter.getDouble(data, i) - min; + x[i] = val > 0. ? Math.log(val) : Double.NEGATIVE_INFINITY; + if (Double.isNaN(x[i])) { + throw new ArithmeticException("NaN value."); + } + } + double median = QuickSelect.median(x); + double mad = computeMAD(x, median); + return estimateFromLogMedianMAD(median, mad, min); + } + + /** + * Compute the median absolute deviation from median. + * + * @param x Input data <b>will be modified</b> + * @param median Median value. + * @return Median absolute deviation from median. + */ + public static double computeMAD(double[] x, double median) { + // Compute deviations: + for (int i = 0; i < x.length; i++) { + x[i] = Math.abs(x[i] - median); + } + double mad = QuickSelect.median(x); + // Fallback if we have more than 50% ties to next largest. + if (!(mad > 0.)) { + double min = Double.POSITIVE_INFINITY; + for (double xi : x) { + if (xi > 0. && xi < min) { + min = xi; + } + } + if (min < Double.POSITIVE_INFINITY) { + mad = min; + } else { + mad = 1.0; // Maybe all constant. No real value. + } + } + return mad; + } + + @Override + public String toString() { + return this.getClass().getSimpleName(); + } +} diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/AbstractLogMOMEstimator.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/AbstractLogMOMEstimator.java new file mode 100644 index 00000000..8ae86d4a --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/AbstractLogMOMEstimator.java @@ -0,0 +1,94 @@ +package de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +import de.lmu.ifi.dbs.elki.math.StatisticalMoments; +import de.lmu.ifi.dbs.elki.math.statistics.distribution.Distribution; +import de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike.NumberArrayAdapter; + +/** + * Abstract base class for estimators based on the statistical moments. + * + * @author Erich Schubert + * + * @param <D> Distribution to generate. + */ +public abstract class AbstractLogMOMEstimator<D extends Distribution> implements LogMOMDistributionEstimator<D> { + /** + * Constructor. + */ + public AbstractLogMOMEstimator() { + super(); + } + + @Override + public abstract D estimateFromLogStatisticalMoments(StatisticalMoments moments, double shift); + + @Override + public <A> D estimate(A data, NumberArrayAdapter<?, A> adapter) { + final int len = adapter.size(data); + double min = AbstractLogMOMEstimator.min(data, adapter, 0., 1e-10); + StatisticalMoments mv = new StatisticalMoments(); + for (int i = 0; i < len; i++) { + final double val = adapter.getDouble(data, i) - min; + if (Double.isInfinite(val) || Double.isNaN(val) || val <= 0.) { + continue; + } + mv.put(Math.log(val)); + } + return estimateFromLogStatisticalMoments(mv, min); + } + + /** + * Utility function to find minimum and maximum values. + * + * @param <A> array type + * @param data Data array + * @param adapter Array adapter + * @param minmin Minimum value for minimum. + * @return Minimum + */ + public static <A> double min(A data, NumberArrayAdapter<?, A> adapter, double minmin, double margin) { + final int len = adapter.size(data); + double min = adapter.getDouble(data, 0), max = min; + for (int i = 1; i < len; i++) { + final double val = adapter.getDouble(data, i); + if (val < min) { + min = val; + } else if (val > max) { + max = val; + } + } + if (min > minmin) { + return minmin; + } + // Add some extra margin, to not have 0s. + return min - (max - min) * margin; + } + + @Override + public String toString() { + return this.getClass().getSimpleName(); + } +} diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/AbstractLogMeanVarianceEstimator.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/AbstractLogMeanVarianceEstimator.java new file mode 100644 index 00000000..a21186db --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/AbstractLogMeanVarianceEstimator.java @@ -0,0 +1,80 @@ +package de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +import de.lmu.ifi.dbs.elki.math.MeanVariance; +import de.lmu.ifi.dbs.elki.math.StatisticalMoments; +import de.lmu.ifi.dbs.elki.math.statistics.distribution.Distribution; +import de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike.NumberArrayAdapter; + +/** + * Estimators that work on Mean and Variance only (i.e. the first two moments + * only). + * + * @author Erich Schubert + * + * @param <D> Distribution to estimate. + */ +public abstract class AbstractLogMeanVarianceEstimator<D extends Distribution> extends AbstractLogMOMEstimator<D> { + /** + * Constructor. + */ + public AbstractLogMeanVarianceEstimator() { + super(); + } + + @Override + public D estimateFromLogStatisticalMoments(StatisticalMoments moments, double shift) { + if (!(moments.getCount() > 1.)) { + throw new ArithmeticException("Too small sample size to estimate variance."); + } + return estimateFromLogMeanVariance(moments, shift); + } + + /** + * Estimate the distribution from mean and variance. + * + * @param mv Mean and variance. + * @param shift Shift that was applied to avoid negative values. + * @return Distribution + */ + public abstract D estimateFromLogMeanVariance(MeanVariance mv, double shift); + + @Override + public <A> D estimate(A data, NumberArrayAdapter<?, A> adapter) { + final int len = adapter.size(data); + double min = AbstractLogMOMEstimator.min(data, adapter, 0., 1e-10); + MeanVariance mv = new MeanVariance(); + for (int i = 0; i < len; i++) { + final double val = adapter.getDouble(data, i) - min; + if (Double.isInfinite(val) || Double.isNaN(val) || val <= 0.) { + continue; + } + mv.put(Math.log(val)); + } + if (!(mv.getCount() > 1.)) { + throw new ArithmeticException("Too small sample size to estimate variance."); + } + return estimateFromLogMeanVariance(mv, min); + } +} diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/AbstractMADEstimator.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/AbstractMADEstimator.java new file mode 100644 index 00000000..54009592 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/AbstractMADEstimator.java @@ -0,0 +1,109 @@ +package de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +import de.lmu.ifi.dbs.elki.math.statistics.distribution.Distribution; +import de.lmu.ifi.dbs.elki.utilities.datastructures.QuickSelect; +import de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike.NumberArrayAdapter; + +/** + * Abstract base class for estimators based on the median and MAD. + * + * @author Erich Schubert + * + * @param <D> Distribution to generate. + */ +public abstract class AbstractMADEstimator<D extends Distribution> implements MADDistributionEstimator<D> { + /** + * Constructor. + */ + public AbstractMADEstimator() { + super(); + } + + @Override + public abstract D estimateFromMedianMAD(double median, double mad); + + @Override + public <A> D estimate(A data, NumberArrayAdapter<?, A> adapter) { + // TODO: detect pre-sorted data? + final int len = adapter.size(data); + // Modifiable copy: + double[] x = new double[len]; + for (int i = 0; i < len; i++) { + x[i] = adapter.getDouble(data, i); + } + double median = QuickSelect.median(x); + double mad = computeMAD(x, median); + return estimateFromMedianMAD(median, mad); + } + + /** + * Compute the median absolute deviation from median. + * + * @param x Input data <b>will be modified</b> + * @param median Median value. + * @return Median absolute deviation from median. + */ + public static double computeMAD(double[] x, double median) { + // Compute deviations: + for (int i = 0; i < x.length; i++) { + x[i] = Math.abs(x[i] - median); + } + double mad = QuickSelect.median(x); + // Fallback if we have more than 50% ties to next largest. + if (!(mad > 0.)) { + double min = Double.POSITIVE_INFINITY; + for (double xi : x) { + if (xi > 0. && xi < min) { + min = xi; + } + } + if (min < Double.POSITIVE_INFINITY) { + mad = min; + } else { + mad = 1.0; // Maybe all constant. No real value. + } + } + if (mad == Double.POSITIVE_INFINITY) { + double max = 0.; + for (double xi : x) { + if (xi < Double.POSITIVE_INFINITY && xi > max) { + max = xi; + } + } + if (max < Double.POSITIVE_INFINITY) { + mad = max; + } else { + mad = 1.0; // No reasonable value. Give up. + } + } + return mad; + } + + @Override + public String toString() { + return this.getClass().getSimpleName(); + } +} diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/AbstractMOMEstimator.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/AbstractMOMEstimator.java new file mode 100644 index 00000000..30bd0802 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/AbstractMOMEstimator.java @@ -0,0 +1,66 @@ +package de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +import de.lmu.ifi.dbs.elki.math.StatisticalMoments; +import de.lmu.ifi.dbs.elki.math.statistics.distribution.Distribution; +import de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike.NumberArrayAdapter; + +/** + * Abstract base class for estimators based on the statistical moments. + * + * @author Erich Schubert + * + * @param <D> Distribution to generate. + */ +public abstract class AbstractMOMEstimator<D extends Distribution> implements MOMDistributionEstimator<D> { + /** + * Constructor. + */ + public AbstractMOMEstimator() { + super(); + } + + @Override + public abstract D estimateFromStatisticalMoments(StatisticalMoments moments); + + @Override + public <A> D estimate(A data, NumberArrayAdapter<?, A> adapter) { + StatisticalMoments mv = new StatisticalMoments(); + int size = adapter.size(data); + for (int i = 0; i < size; i++) { + final double val = adapter.getDouble(data, i); + if (Double.isInfinite(val) || Double.isNaN(val)) { + continue; + } + mv.put(val); + } + return estimateFromStatisticalMoments(mv); + } + + @Override + public String toString() { + return this.getClass().getSimpleName(); + } +} diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/AbstractMeanVarianceEstimator.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/AbstractMeanVarianceEstimator.java new file mode 100644 index 00000000..73a2e6d2 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/AbstractMeanVarianceEstimator.java @@ -0,0 +1,67 @@ +package de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +import de.lmu.ifi.dbs.elki.math.MeanVariance; +import de.lmu.ifi.dbs.elki.math.StatisticalMoments; +import de.lmu.ifi.dbs.elki.math.statistics.distribution.Distribution; +import de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike.NumberArrayAdapter; + +/** + * Estimators that work on Mean and Variance only (i.e. the first two moments + * only). + * + * @author Erich Schubert + * + * @param <D> Distribution to estimate. + */ +public abstract class AbstractMeanVarianceEstimator<D extends Distribution> extends AbstractMOMEstimator<D> implements MeanVarianceDistributionEstimator<D> { + /** + * Constructor. + */ + public AbstractMeanVarianceEstimator() { + super(); + } + + @Override + public D estimateFromStatisticalMoments(StatisticalMoments moments) { + return estimateFromMeanVariance(moments); + } + + @Override + public abstract D estimateFromMeanVariance(MeanVariance mv); + + @Override + public <A> D estimate(A data, NumberArrayAdapter<?, A> adapter) { + MeanVariance mv = new MeanVariance(); + int size = adapter.size(data); + for (int i = 0; i < size; i++) { + final double val = adapter.getDouble(data, i); + if (Double.isInfinite(val) || Double.isNaN(val)) { + continue; + } + mv.put(val); + } + return estimateFromMeanVariance(mv); + } +} diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/CauchyMADEstimator.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/CauchyMADEstimator.java new file mode 100644 index 00000000..e1cfb20c --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/CauchyMADEstimator.java @@ -0,0 +1,80 @@ +package de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +import de.lmu.ifi.dbs.elki.math.statistics.distribution.CauchyDistribution; +import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; + +/** + * Estimate Cauchy distribution parameters using Median and MAD. + * + * Reference: + * <p> + * D. J. Olive<br /> + * Applied Robust Statistics<br /> + * Preprint of an upcoming book, University of Minnesota + * </p> + * + * @author Erich Schubert + * + * @apiviz.has CauchyDistribution + */ +@Reference(title = "Applied Robust Statistics", authors = "D. J. Olive", booktitle = "Applied Robust Statistics", url="http://lagrange.math.siu.edu/Olive/preprints.htm") +public class CauchyMADEstimator extends AbstractMADEstimator<CauchyDistribution> { + /** + * Static instance. + */ + public static final CauchyMADEstimator STATIC = new CauchyMADEstimator(); + + /** + * Private constructor, use static instance! + */ + private CauchyMADEstimator() { + // Do not instantiate + } + + @Override + public CauchyDistribution estimateFromMedianMAD(double median, double mad) { + return new CauchyDistribution(median, mad); + } + + @Override + public Class<? super CauchyDistribution> getDistributionClass() { + return CauchyDistribution.class; + } + + /** + * Parameterization class. + * + * @author Erich Schubert + * + * @apiviz.exclude + */ + public static class Parameterizer extends AbstractParameterizer { + @Override + protected CauchyMADEstimator makeInstance() { + return STATIC; + } + } +} diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/DistributionEstimator.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/DistributionEstimator.java new file mode 100644 index 00000000..953fcff8 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/DistributionEstimator.java @@ -0,0 +1,52 @@ +package de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +import de.lmu.ifi.dbs.elki.math.statistics.distribution.Distribution; +import de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike.NumberArrayAdapter; + +/** + * Estimate distribution parameters from a sample. + * + * @author Erich Schubert + * + * @param <D> Distribution type + */ +public interface DistributionEstimator<D extends Distribution> { + /** + * General form of the parameter estimation + * + * @param data Data set + * @param adapter Number array adapter + * @return Estimated distribution + */ + <A> D estimate(A data, NumberArrayAdapter<?, A> adapter); + + /** + * Get the class that is produced by the estimator. + * + * @return Distribution class + */ + Class<? super D> getDistributionClass(); +} diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/EMGOlivierNorbergEstimator.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/EMGOlivierNorbergEstimator.java new file mode 100644 index 00000000..65c89c83 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/EMGOlivierNorbergEstimator.java @@ -0,0 +1,82 @@ +package de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +import de.lmu.ifi.dbs.elki.math.StatisticalMoments; +import de.lmu.ifi.dbs.elki.math.statistics.distribution.ExponentiallyModifiedGaussianDistribution; +import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; + +/** + * Naive distribution estimation using mean and sample variance. + * + * @author Erich Schubert + * + * @apiviz.has ExponentiallyModifiedGaussianDistribution - - estimates + */ +@Reference(authors = "J. Olivier, M. M. Norberg", title = "Positively skewed data: Revisiting the Box-Cox power transformation", booktitle = "International Journal of Psychological Research Vol. 3 No. 1") +public class EMGOlivierNorbergEstimator extends AbstractMOMEstimator<ExponentiallyModifiedGaussianDistribution> { + /** + * Static estimator class. + */ + public static EMGOlivierNorbergEstimator STATIC = new EMGOlivierNorbergEstimator(); + + /** + * Private constructor, use static instance! + */ + private EMGOlivierNorbergEstimator() { + // Do not instantiate + } + + @Override + public ExponentiallyModifiedGaussianDistribution estimateFromStatisticalMoments(StatisticalMoments moments) { + // Avoid NaN by disallowing negative kurtosis. + final double halfsk13 = Math.pow(Math.max(0., moments.getSampleSkewness() * .5), 1. / 3.); + final double st = moments.getSampleStddev(); + final double mu = moments.getMean() - st * halfsk13; + // Note: we added "abs" here, to avoid even more NaNs. + final double si = st * Math.sqrt(Math.abs((1. + halfsk13) * (1. - halfsk13))); + // One more workaround to ensure finite lambda... + final double la = (halfsk13 > 0) ? 1 / (st * halfsk13) : 1; + return new ExponentiallyModifiedGaussianDistribution(mu, si, la); + } + + @Override + public Class<? super ExponentiallyModifiedGaussianDistribution> getDistributionClass() { + return ExponentiallyModifiedGaussianDistribution.class; + } + + /** + * Parameterization class. + * + * @author Erich Schubert + * + * @apiviz.exclude + */ + public static class Parameterizer extends AbstractParameterizer { + @Override + protected EMGOlivierNorbergEstimator makeInstance() { + return STATIC; + } + } +} diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/ExpMADDistributionEstimator.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/ExpMADDistributionEstimator.java new file mode 100644 index 00000000..70a16f3e --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/ExpMADDistributionEstimator.java @@ -0,0 +1,45 @@ +package de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +import de.lmu.ifi.dbs.elki.math.statistics.distribution.Distribution; + +/** + * Distribuition estimators that use the method of moments (MOM) in + * exponentiated data. + * + * @author Erich Schubert + * + * @param <D> Distribution estimated. + */ +public interface ExpMADDistributionEstimator<D extends Distribution> extends DistributionEstimator<D> { + /** + * General form of the parameter estimation + * + * @param median Median lof exp values. + * @param mad Median absolute deviation from median (in expspace). + * @return Estimated distribution + */ + D estimateFromExpMedianMAD(double median, double mad); +} diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/ExponentialLMMEstimator.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/ExponentialLMMEstimator.java new file mode 100644 index 00000000..66176545 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/ExponentialLMMEstimator.java @@ -0,0 +1,92 @@ +package de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +import de.lmu.ifi.dbs.elki.math.statistics.distribution.ExponentialDistribution; +import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; + +/** + * Estimate the parameters of a Gamma Distribution, using the methods of + * L-Moments (LMM). + * + * Reference: + * <p> + * J. R. M. Hosking<br /> + * Fortran routines for use with the method of L-moments Version 3.03<br /> + * IBM Research. + * </p> + * + * + * @author Erich Schubert + * + * @apiviz.has ExponentialDistribution + */ +@Reference(authors = "J.R.M. Hosking", title = "Fortran routines for use with the method of L-moments Version 3.03", booktitle = "IBM Research Technical Report") +public class ExponentialLMMEstimator extends AbstractLMMEstimator<ExponentialDistribution> { + /** + * Static instance. + */ + public static final ExponentialLMMEstimator STATIC = new ExponentialLMMEstimator(); + + /** + * Constructor. Private: use static instance. + */ + private ExponentialLMMEstimator() { + super(); + } + + @Override + public int getNumMoments() { + return 2; + } + + @Override + public ExponentialDistribution estimateFromLMoments(double[] xmom) { + double scale = 2. * xmom[1]; + if (!(scale > 0.)) { + throw new ArithmeticException("Data with non-positive scale cannot be exponential distributed."); + } + return new ExponentialDistribution(1. / scale, xmom[0] - scale); + } + + @Override + public Class<? super ExponentialDistribution> getDistributionClass() { + return ExponentialDistribution.class; + } + + /** + * Parameterization class. + * + * @author Erich Schubert + * + * @apiviz.exclude + */ + public static class Parameterizer extends AbstractParameterizer { + @Override + protected ExponentialLMMEstimator makeInstance() { + return STATIC; + } + } +} diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/ExponentialMADEstimator.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/ExponentialMADEstimator.java new file mode 100644 index 00000000..208fc72b --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/ExponentialMADEstimator.java @@ -0,0 +1,85 @@ +package de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +import de.lmu.ifi.dbs.elki.math.statistics.distribution.ExponentialDistribution; +import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; + +/** + * Estimate Exponential distribution parameters using Median and MAD. + * + * Reference: + * <p> + * D. J. Olive<br /> + * Applied Robust Statistics<br /> + * Preprint of an upcoming book, University of Minnesota + * </p> + * + * @author Erich Schubert + * + * @apiviz.has ExponentialDistribution + */ +@Reference(title = "Applied Robust Statistics", authors = "D. J. Olive", booktitle = "Applied Robust Statistics", url="http://lagrange.math.siu.edu/Olive/preprints.htm") +public class ExponentialMADEstimator extends AbstractMADEstimator<ExponentialDistribution> { + /** + * Static instance. + */ + public static final ExponentialMADEstimator STATIC = new ExponentialMADEstimator(); + + /** + * Private constructor, use static instance! + */ + private ExponentialMADEstimator() { + // Do not instantiate + } + + @Override + public ExponentialDistribution estimateFromMedianMAD(double median, double mad) { + final double location = median - 1.440 * mad; + final double scale = 2.0781 * mad; + if (!(scale > 0.)) { + throw new ArithmeticException("Data with non-positive MAD cannot be exponential distributed."); + } + return new ExponentialDistribution(1./scale, location); + } + + @Override + public Class<? super ExponentialDistribution> getDistributionClass() { + return ExponentialDistribution.class; + } + + /** + * Parameterization class. + * + * @author Erich Schubert + * + * @apiviz.exclude + */ + public static class Parameterizer extends AbstractParameterizer { + @Override + protected ExponentialMADEstimator makeInstance() { + return STATIC; + } + } +} diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/ExponentialMOMEstimator.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/ExponentialMOMEstimator.java new file mode 100644 index 00000000..4c3f93aa --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/ExponentialMOMEstimator.java @@ -0,0 +1,77 @@ +package de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +import de.lmu.ifi.dbs.elki.math.MeanVariance; +import de.lmu.ifi.dbs.elki.math.statistics.distribution.ExponentialDistribution; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; + +/** + * Estimate Exponential distribution parameters using the mean, which is the + * maximum-likelihood estimate (MLE), but not very robust. + * + * @author Erich Schubert + * + * @apiviz.has ExponentialDistribution + */ +public class ExponentialMOMEstimator extends AbstractMeanVarianceEstimator<ExponentialDistribution> { + /** + * Static instance. + */ + public static final ExponentialMOMEstimator STATIC = new ExponentialMOMEstimator(); + + /** + * Private constructor, use static instance! + */ + private ExponentialMOMEstimator() { + // Do not instantiate + } + + @Override + public ExponentialDistribution estimateFromMeanVariance(MeanVariance mv) { + final double scale = mv.getMean(); + if (!(scale > 0.)) { + throw new ArithmeticException("Data with non-positive mean cannot be exponential distributed."); + } + return new ExponentialDistribution(1. / scale); + } + + @Override + public Class<? super ExponentialDistribution> getDistributionClass() { + return ExponentialDistribution.class; + } + + /** + * Parameterization class. + * + * @author Erich Schubert + * + * @apiviz.exclude + */ + public static class Parameterizer extends AbstractParameterizer { + @Override + protected ExponentialMOMEstimator makeInstance() { + return STATIC; + } + } +} diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/ExponentialMedianEstimator.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/ExponentialMedianEstimator.java new file mode 100644 index 00000000..19ce63c7 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/ExponentialMedianEstimator.java @@ -0,0 +1,83 @@ +package de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +import de.lmu.ifi.dbs.elki.math.statistics.distribution.ExponentialDistribution; +import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; + +/** + * Estimate Exponential distribution parameters using Median and MAD. + * + * Reference: + * <p> + * Robust Estimators for Transformed Location Scale Families<br /> + * D. J. Olive + * </p> + * + * @author Erich Schubert + * + * @apiviz.has ExponentialDistribution + */ +@Reference(title = "Robust Estimators for Transformed Location Scale Families", authors = "D. J. Olive", booktitle = "") +public class ExponentialMedianEstimator extends AbstractMADEstimator<ExponentialDistribution> { + /** + * Static instance. + */ + public static final ExponentialMedianEstimator STATIC = new ExponentialMedianEstimator(); + + /** + * Private constructor, use static instance! + */ + private ExponentialMedianEstimator() { + // Do not instantiate + } + + @Override + public ExponentialDistribution estimateFromMedianMAD(double median, double mad) { + final double scale = 1.441 * median; + if (!(scale > 0.)) { + throw new ArithmeticException("Data with non-positive mean cannot be exponential distributed."); + } + return new ExponentialDistribution(1. / scale); + } + + @Override + public Class<? super ExponentialDistribution> getDistributionClass() { + return ExponentialDistribution.class; + } + + /** + * Parameterization class. + * + * @author Erich Schubert + * + * @apiviz.exclude + */ + public static class Parameterizer extends AbstractParameterizer { + @Override + protected ExponentialMedianEstimator makeInstance() { + return STATIC; + } + } +} diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/GammaChoiWetteEstimator.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/GammaChoiWetteEstimator.java new file mode 100644 index 00000000..d41881f0 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/GammaChoiWetteEstimator.java @@ -0,0 +1,118 @@ +package de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +import de.lmu.ifi.dbs.elki.math.statistics.distribution.GammaDistribution; +import de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike.NumberArrayAdapter; +import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; + +/** + * Estimate distribution parameters using the method by Choi and Wette. + * + * Reference: + * <p> + * Maximum likelihood estimation of the parameters of the gamma distribution and + * their bias<br /> + * S. C. Choi, R. Wette<br /> + * in: Technometrics + * </p> + * + * @author Erich Schubert + * + * @apiviz.has GammaDistribution - - estimates + */ +@Reference(title = "Maximum likelihood estimation of the parameters of the gamma distribution and their bias", authors = "S. C. Choi, R. Wette", booktitle = "Technometrics", url = "http://www.jstor.org/stable/10.2307/1266892") +public class GammaChoiWetteEstimator implements DistributionEstimator<GammaDistribution> { + /** + * Static estimation, using iterative refinement. + */ + public static final GammaChoiWetteEstimator STATIC = new GammaChoiWetteEstimator(); + + /** + * Private constructor. + */ + private GammaChoiWetteEstimator() { + // Do not instantiate - use static class + } + + @Override + public <A> GammaDistribution estimate(A data, NumberArrayAdapter<?, A> adapter) { + final int len = adapter.size(data); + double meanx = 0, meanlogx = 0; + for (int i = 0; i < len; i++) { + final double val = adapter.getDouble(data, i); + if (val <= 0 || Double.isInfinite(val) || Double.isNaN(val)) { + continue; + } + final double logx = (val > 0) ? Math.log(val) : meanlogx; + final double deltax = val - meanx; + final double deltalogx = logx - meanlogx; + meanx += deltax / (i + 1.); + meanlogx += deltalogx / (i + 1.); + } + // Initial approximation + final double logmeanx = Math.log(meanx); + final double diff = logmeanx - meanlogx; + double k = (3 - diff + Math.sqrt((diff - 3) * (diff - 3) + 24 * diff)) / (12 * diff); + + // Refine via newton iteration, based on Choi and Wette equation + while (true) { + double kdelta = (Math.log(k) - GammaDistribution.digamma(k) - diff) / (1 / k - GammaDistribution.trigamma(k)); + if (Math.abs(kdelta) / k < 1E-8 || !(kdelta < Double.POSITIVE_INFINITY)) { + break; + } + k += kdelta; + } + // Estimate theta: + final double theta = k / meanx; + if (!(k > 0.0) || !(theta > 0.0)) { + throw new ArithmeticException("Gamma estimation produced non-positive parameter values: k=" + k + " theta=" + theta); + } + return new GammaDistribution(k, theta); + } + + @Override + public Class<? super GammaDistribution> getDistributionClass() { + return GammaDistribution.class; + } + + @Override + public String toString() { + return this.getClass().getSimpleName(); + } + + /** + * Parameterization class. + * + * @author Erich Schubert + * + * @apiviz.exclude + */ + public static class Parameterizer extends AbstractParameterizer { + @Override + protected GammaChoiWetteEstimator makeInstance() { + return STATIC; + } + } +} diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/GammaLMMEstimator.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/GammaLMMEstimator.java new file mode 100644 index 00000000..edfc3f51 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/GammaLMMEstimator.java @@ -0,0 +1,113 @@ +package de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +import de.lmu.ifi.dbs.elki.math.statistics.distribution.GammaDistribution; +import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; + +/** + * Estimate the parameters of a Gamma Distribution, using the methods of + * L-Moments (LMM). + * + * Reference: + * <p> + * J. R. M. Hosking<br /> + * Fortran routines for use with the method of L-moments Version 3.03<br /> + * IBM Research. + * </p> + * + * @author Erich Schubert + * + * @apiviz.has GammaDistribution + */ +@Reference(authors = "J.R.M. Hosking", title = "Fortran routines for use with the method of L-moments Version 3.03", booktitle = "IBM Research Technical Report") +public class GammaLMMEstimator extends AbstractLMMEstimator<GammaDistribution> { + /** + * Static instance. + */ + public static final GammaLMMEstimator STATIC = new GammaLMMEstimator(); + + /** Coefficients for polynomial approximation */ + private static double // + A1 = -0.3080, // + A2 = -0.05812, // + A3 = 0.01765; + + /** Coefficients for polynomial approximation */ + private static double // + B1 = 0.7213, // + B2 = -0.5947, // + B3 = -2.1817, // + B4 = 1.2113; + + /** + * Constructor. Private: use static instance. + */ + private GammaLMMEstimator() { + super(); + } + + @Override + public int getNumMoments() { + return 2; + } + + @Override + public GammaDistribution estimateFromLMoments(double[] xmom) { + double cv = xmom[1] / xmom[0]; + double alpha; + if (cv < .5) { + double t = Math.PI * cv * cv; + alpha = (1. + A1 * t) / (t * (1. + t * (A2 + t * A3))); + } else { + double t = 1. - cv; + alpha = t * (B1 + t * B2) / (1. + t * (B3 + t * B4)); + } + final double theta = alpha / xmom[0]; + if (!(alpha > 0.0) || !(theta > 0.0)) { + throw new ArithmeticException("Gamma estimation produced non-positive parameter values: k=" + alpha + " theta=" + theta); + } + return new GammaDistribution(alpha, theta); + } + + @Override + public Class<? super GammaDistribution> getDistributionClass() { + return GammaDistribution.class; + } + + /** + * Parameterization class. + * + * @author Erich Schubert + * + * @apiviz.exclude + */ + public static class Parameterizer extends AbstractParameterizer { + @Override + protected GammaLMMEstimator makeInstance() { + return STATIC; + } + } +} diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/GammaMADEstimator.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/GammaMADEstimator.java new file mode 100644 index 00000000..54b0d38b --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/GammaMADEstimator.java @@ -0,0 +1,96 @@ +package de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +import de.lmu.ifi.dbs.elki.math.statistics.distribution.GammaDistribution; +import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; + +/** + * Robust parameter estimation for the Gamma distribution. + * + * Based on the Median and Median absolute deviation from Median (MAD). + * + * Reference: + * <p> + * J. Chen and H. Rubin<br /> + * Bounds for the difference between median and mean of Gamma and Poisson + * distributions<br /> + * In: Statist. Probab. Lett., 4 , 281–283. + * </p> + * + * @author Erich Schubert + * + * @apiviz.has GammaDistribution - - estimates + */ +@Reference(authors = "J. Chen. H. Rubin", title = "Bounds for the difference between median and mean of Gamma and Poisson distributions", booktitle = "Statist. Probab. Lett., 4") +public class GammaMADEstimator extends AbstractMADEstimator<GammaDistribution> { + /** + * Static instance. + */ + public static final GammaMADEstimator STATIC = new GammaMADEstimator(); + + /** + * Private constructor. + */ + private GammaMADEstimator() { + // Do not instantiate - use static class + } + + @Override + public GammaDistribution estimateFromMedianMAD(double median, double mad) { + if (median < Double.MIN_NORMAL) { + throw new ArithmeticException("Cannot estimate Gamma parameters on a distribution with zero median."); + } + if (mad < Double.MIN_NORMAL) { + throw new ArithmeticException("Cannot estimate Gamma parameters on a distribution with zero MAD."); + } + + final double theta = median / (mad * mad); + final double k = median * theta; + if (!(k > 0.) || !(theta > 0.)) { + throw new ArithmeticException("Gamma estimation produced non-positive parameter values: k=" + k + " theta=" + theta); + } + return new GammaDistribution(k, theta); + + } + + @Override + public Class<? super GammaDistribution> getDistributionClass() { + return GammaDistribution.class; + } + + /** + * Parameterization class. + * + * @author Erich Schubert + * + * @apiviz.exclude + */ + public static class Parameterizer extends AbstractParameterizer { + @Override + protected GammaMADEstimator makeInstance() { + return STATIC; + } + } +} diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/GammaMOMEstimator.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/GammaMOMEstimator.java new file mode 100644 index 00000000..0ff0cf47 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/GammaMOMEstimator.java @@ -0,0 +1,93 @@ +package de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +import de.lmu.ifi.dbs.elki.math.MeanVariance; +import de.lmu.ifi.dbs.elki.math.statistics.distribution.GammaDistribution; +import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; + +/** + * Simple parameter estimation for the Gamma distribution. + * + * This is a very naive estimation, based on the mean and variance only, + * sometimes referred to as the "Method of Moments" (MOM). + * + * Reference: + * <p> + * G. Casella, R. L. Berger<br /> + * Statistical inference. Vol. 70 + * </p> + * + * @author Erich Schubert + * + * @apiviz.has GammaDistribution - - estimates + */ +@Reference(authors = "G. Casella, R. L. Berger", title = "Statistical inference. Vol. 70", booktitle = "Statistical inference. Vol. 70") +public class GammaMOMEstimator extends AbstractMeanVarianceEstimator<GammaDistribution> { + /** + * Static estimation using just the mean and variance. + */ + public static final GammaMOMEstimator STATIC = new GammaMOMEstimator(); + + /** + * Private constructor. + */ + private GammaMOMEstimator() { + // Do not instantiate - use static class + } + + @Override + public GammaDistribution estimateFromMeanVariance(MeanVariance mv) { + final double mu = mv.getMean(); + final double var = mv.getSampleVariance(); + if (mu < Double.MIN_NORMAL || var < Double.MIN_NORMAL) { + throw new ArithmeticException("Cannot estimate Gamma parameters on a distribution with zero mean or variance: " + mv.toString()); + } + final double theta = mu / var; + final double k = mu * theta; + if (!(k > 0.) || !(theta > 0.)) { + throw new ArithmeticException("Gamma estimation produced non-positive parameter values: k=" + k + " theta=" + theta); + } + return new GammaDistribution(k, theta); + } + + @Override + public Class<? super GammaDistribution> getDistributionClass() { + return GammaDistribution.class; + } + + /** + * Parameterization class. + * + * @author Erich Schubert + * + * @apiviz.exclude + */ + public static class Parameterizer extends AbstractParameterizer { + @Override + protected GammaMOMEstimator makeInstance() { + return STATIC; + } + } +} diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/GeneralizedExtremeValueLMMEstimator.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/GeneralizedExtremeValueLMMEstimator.java new file mode 100644 index 00000000..cdadf47d --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/GeneralizedExtremeValueLMMEstimator.java @@ -0,0 +1,161 @@ +package de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +import de.lmu.ifi.dbs.elki.math.MathUtil; +import de.lmu.ifi.dbs.elki.math.statistics.distribution.GammaDistribution; +import de.lmu.ifi.dbs.elki.math.statistics.distribution.GeneralizedExtremeValueDistribution; +import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; + +/** + * Estimate the parameters of a Generalized Extreme Value Distribution, using + * the methods of L-Moments (LMM). + * + * Reference: + * <p> + * J. R. M. Hosking, J. R. Wallis, and E. F. Wood<br /> + * Estimation of the generalized extreme-value distribution by the method of + * probability-weighted moments.<br /> + * Technometrics 27.3 + * </p> + * + * @author Erich Schubert + * + * @apiviz.has GeneralizedExtremeValueDistribution + */ +@Reference(authors = "J.R.M. Hosking, J. R. Wallis, and E. F. Wood", title = "Estimation of the generalized extreme-value distribution by the method of probability-weighted moments.", booktitle = "Technometrics 27.3", url = "http://dx.doi.org/10.1080/00401706.1985.10488049") +public class GeneralizedExtremeValueLMMEstimator extends AbstractLMMEstimator<GeneralizedExtremeValueDistribution> { + /** + * Static instance. + */ + public static final GeneralizedExtremeValueLMMEstimator STATIC = new GeneralizedExtremeValueLMMEstimator(); + + /** + * Constants for fast rational approximations. + */ + private static final double // + A0 = 0.28377530, // + A1 = -1.21096399, // + A2 = -2.50728214, // + A3 = -1.13455566, // + A4 = -0.07138022; + + private static final double // + B1 = 2.06189696, // + B2 = 1.31912239, // + B3 = 0.25077104; + + private static final double // + C1 = 1.59921491, // + C2 = -0.48832213, // + C3 = 0.01573152, // + D1 = -0.64363929, // + D2 = 0.08985247; + + /** Maximum number of iterations. */ + static int MAXIT = 20; + + /** + * Constructor. Private: use static instance. + */ + private GeneralizedExtremeValueLMMEstimator() { + super(); + } + + @Override + public int getNumMoments() { + return 3; + } + + @Override + public GeneralizedExtremeValueDistribution estimateFromLMoments(double[] xmom) { + double t3 = xmom[2]; + if (Math.abs(t3) < 1e-50 || (t3 >= 1.)) { + throw new ArithmeticException("Invalid moment estimation."); + } + // Approximation for t3 between 0 and 1: + double g; + if (t3 > 0.) { + double z = 1. - t3; + g = (-1. + z * (C1 + z * (C2 + z * C3))) / (1. + z * (D1 + z * D2)); + // g: Almost zero? + if (Math.abs(g) < 1e-50) { + double k = 0; + double sigma = xmom[1] / MathUtil.LOG2; + double mu = xmom[0] - Math.E * sigma; + return new GeneralizedExtremeValueDistribution(mu, sigma, k); + } + } else { + // Approximation for t3 between -.8 and 0L: + g = (A0 + t3 * (A1 + t3 * (A2 + t3 * (A3 + t3 * A4)))) / (1. + t3 * (B1 + t3 * (B2 + t3 * B3))); + if (t3 < -.8) { + // Newton-Raphson iteration for t3 < -.8 + if (t3 <= -.97) { + g = 1. - Math.log(1. + t3) / MathUtil.LOG2; + } + double t0 = .5 * (t3 + 3.); + for (int it = 1;; it++) { + double x2 = Math.pow(2., -g), xx2 = 1. - x2; + double x3 = Math.pow(3., -g), xx3 = 1. - x3; + double t = xx3 / xx2; + double deriv = (xx2 * x3 * MathUtil.LOG3 - xx3 * x2 * MathUtil.LOG2) / (xx2 * x2); + double oldg = g; + g -= (t - t0) / deriv; + if (Math.abs(g - oldg) < 1e-20 * g) { + break; + } + if (it >= MAXIT) { + throw new ArithmeticException("Newton-Raphson did not converge."); + } + } + } + } + double gam = Math.exp(GammaDistribution.logGamma(1. + g)); + final double mu, sigma, k; + k = g; + sigma = xmom[1] * g / (gam * (1. - Math.pow(2., -g))); + mu = xmom[0] - sigma * (1. - gam) / g; + return new GeneralizedExtremeValueDistribution(mu, sigma, k); + } + + @Override + public Class<? super GeneralizedExtremeValueDistribution> getDistributionClass() { + return GeneralizedExtremeValueDistribution.class; + } + + /** + * Parameterization class. + * + * @author Erich Schubert + * + * @apiviz.exclude + */ + public static class Parameterizer extends AbstractParameterizer { + @Override + protected GeneralizedExtremeValueLMMEstimator makeInstance() { + return STATIC; + } + } +} diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/GeneralizedLogisticAlternateLMMEstimator.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/GeneralizedLogisticAlternateLMMEstimator.java new file mode 100644 index 00000000..dfcbcd52 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/GeneralizedLogisticAlternateLMMEstimator.java @@ -0,0 +1,98 @@ +package de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +import de.lmu.ifi.dbs.elki.math.statistics.distribution.GeneralizedLogisticAlternateDistribution; +import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; + +/** + * Estimate the parameters of a Generalized Logistic Distribution, using the + * methods of L-Moments (LMM). + * + * Reference: + * <p> + * J. R. M. Hosking<br /> + * Fortran routines for use with the method of L-moments Version 3.03<br /> + * IBM Research. + * </p> + * + * @author Erich Schubert + * + * @apiviz.has GeneralizedLogisticAlternateDistribution + */ +@Reference(authors = "J.R.M. Hosking", title = "Fortran routines for use with the method of L-moments Version 3.03", booktitle = "IBM Research Technical Report") +public class GeneralizedLogisticAlternateLMMEstimator extends AbstractLMMEstimator<GeneralizedLogisticAlternateDistribution> { + /** + * Static instance. + */ + public static final GeneralizedLogisticAlternateLMMEstimator STATIC = new GeneralizedLogisticAlternateLMMEstimator(); + + /** + * Constructor. Private: use static instance. + */ + private GeneralizedLogisticAlternateLMMEstimator() { + super(); + } + + @Override + public int getNumMoments() { + return 3; + } + + @Override + public GeneralizedLogisticAlternateDistribution estimateFromLMoments(double[] xmom) { + double shape = -xmom[2]; + if (!(shape >= -1 && shape <= 1)) { + throw new ArithmeticException("Invalid moment estimation."); + } + if (Math.abs(shape) < 1e-6) { + // Effectively zero, so non-generalized. + return new GeneralizedLogisticAlternateDistribution(xmom[0], xmom[1], 0.); + } + double tmp = shape * Math.PI / Math.sin(shape * Math.PI); + double scale = xmom[1] / tmp; + double location = xmom[0] - scale * (1. - tmp) / shape; + return new GeneralizedLogisticAlternateDistribution(location, scale, shape); + } + + @Override + public Class<? super GeneralizedLogisticAlternateDistribution> getDistributionClass() { + return GeneralizedLogisticAlternateDistribution.class; + } + + /** + * Parameterization class. + * + * @author Erich Schubert + * + * @apiviz.exclude + */ + public static class Parameterizer extends AbstractParameterizer { + @Override + protected GeneralizedLogisticAlternateLMMEstimator makeInstance() { + return STATIC; + } + } +} diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/GumbelLMMEstimator.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/GumbelLMMEstimator.java new file mode 100644 index 00000000..c0f64006 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/GumbelLMMEstimator.java @@ -0,0 +1,90 @@ +package de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +import de.lmu.ifi.dbs.elki.math.MathUtil; +import de.lmu.ifi.dbs.elki.math.statistics.distribution.GumbelDistribution; +import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; + +/** + * Estimate the parameters of a Gumbel Distribution, using the methods of + * L-Moments (LMM). + * + * Reference: + * <p> + * J. R. M. Hosking<br /> + * Fortran routines for use with the method of L-moments Version 3.03<br /> + * IBM Research. + * </p> + * + * + * @author Erich Schubert + * + * @apiviz.has GumbelDistribution + */ +@Reference(authors = "J.R.M. Hosking", title = "Fortran routines for use with the method of L-moments Version 3.03", booktitle = "IBM Research Technical Report") +public class GumbelLMMEstimator extends AbstractLMMEstimator<GumbelDistribution> { + /** + * Static instance. + */ + public static final GumbelLMMEstimator STATIC = new GumbelLMMEstimator(); + + /** + * Constructor. Private: use static instance. + */ + private GumbelLMMEstimator() { + super(); + } + + @Override + public int getNumMoments() { + return 2; + } + + @Override + public GumbelDistribution estimateFromLMoments(double[] xmom) { + double scale = xmom[1] / MathUtil.LOG2; + return new GumbelDistribution(xmom[0] - Math.E * scale, scale); + } + + @Override + public Class<? super GumbelDistribution> getDistributionClass() { + return GumbelDistribution.class; + } + + /** + * Parameterization class. + * + * @author Erich Schubert + * + * @apiviz.exclude + */ + public static class Parameterizer extends AbstractParameterizer { + @Override + protected GumbelLMMEstimator makeInstance() { + return STATIC; + } + } +} diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/GumbelMADEstimator.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/GumbelMADEstimator.java new file mode 100644 index 00000000..ebf6354a --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/GumbelMADEstimator.java @@ -0,0 +1,82 @@ +package de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +import de.lmu.ifi.dbs.elki.math.statistics.distribution.GumbelDistribution; +import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; + +/** + * Parameter estimation via median and median absolute deviation from median + * (MAD). + * + * Reference: + * <p> + * D. J. Olive<br /> + * Applied Robust Statistics<br /> + * Preprint of an upcoming book, University of Minnesota + * </p> + * + * @author Erich Schubert + * + * @apiviz.has GumbelDistribution - - estimates + */ +@Reference(title = "Applied Robust Statistics", authors = "D. J. Olive", booktitle = "Applied Robust Statistics", url="http://lagrange.math.siu.edu/Olive/preprints.htm") +public class GumbelMADEstimator extends AbstractMADEstimator<GumbelDistribution> { + /** + * Static instance. + */ + public static final GumbelMADEstimator STATIC = new GumbelMADEstimator(); + + /** + * Private constructor, use static instance! + */ + private GumbelMADEstimator() { + // Do not instantiate + } + + @Override + public GumbelDistribution estimateFromMedianMAD(double median, double mad) { + // TODO: Work around degenerate cases? + return new GumbelDistribution(median + 0.4778 * mad, 1.3037 * mad); + } + + @Override + public Class<? super GumbelDistribution> getDistributionClass() { + return GumbelDistribution.class; + } + + /** + * Parameterization class. + * + * @author Erich Schubert + * + * @apiviz.exclude + */ + public static class Parameterizer extends AbstractParameterizer { + @Override + protected GumbelMADEstimator makeInstance() { + return STATIC; + } + } +} diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/LMMDistributionEstimator.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/LMMDistributionEstimator.java new file mode 100644 index 00000000..f3d8d1b2 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/LMMDistributionEstimator.java @@ -0,0 +1,51 @@ +package de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +import de.lmu.ifi.dbs.elki.math.statistics.distribution.Distribution; + +/** + * Interface for distribution estimators based on the methods of L-Moments + * (LMM). + * + * @author Erich Schubert + * + * @param <D> Distribution class. + */ +public interface LMMDistributionEstimator<D extends Distribution> extends DistributionEstimator<D> { + /** + * Estimate from the L-Moments. + * + * @param moments L-Moments + * @return Distribution + */ + D estimateFromLMoments(double[] moments); + + /** + * The number of moments needed. + * + * @return Moments needed. + */ + int getNumMoments(); +} diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/LaplaceLMMEstimator.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/LaplaceLMMEstimator.java new file mode 100644 index 00000000..1e31af28 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/LaplaceLMMEstimator.java @@ -0,0 +1,78 @@ +package de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +import de.lmu.ifi.dbs.elki.math.statistics.distribution.LaplaceDistribution; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; + +/** + * Estimate Laplace distribution parameters using the method of L-Moments (LMM). + * + * @author Erich Schubert + * + * @apiviz.has ExponentialDistribution + */ +public class LaplaceLMMEstimator extends AbstractLMMEstimator<LaplaceDistribution> { + /** + * Static instance. + */ + public static final LaplaceLMMEstimator STATIC = new LaplaceLMMEstimator(); + + /** + * Private constructor, use static instance! + */ + private LaplaceLMMEstimator() { + // Do not instantiate + } + + @Override + public LaplaceDistribution estimateFromLMoments(double[] xmom) { + final double location = xmom[0]; + final double scale = 4. / 3. * xmom[1]; + return new LaplaceDistribution(1. / scale, location); + } + + @Override + public int getNumMoments() { + return 2; + } + + @Override + public Class<? super LaplaceDistribution> getDistributionClass() { + return LaplaceDistribution.class; + } + + /** + * Parameterization class. + * + * @author Erich Schubert + * + * @apiviz.exclude + */ + public static class Parameterizer extends AbstractParameterizer { + @Override + protected LaplaceLMMEstimator makeInstance() { + return STATIC; + } + } +} diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/LaplaceMADEstimator.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/LaplaceMADEstimator.java new file mode 100644 index 00000000..d4671362 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/LaplaceMADEstimator.java @@ -0,0 +1,82 @@ +package de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +import de.lmu.ifi.dbs.elki.math.statistics.distribution.LaplaceDistribution; +import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; + +/** + * Estimate Laplace distribution parameters using Median and MAD. + * + * Reference: + * <p> + * D. J. Olive<br /> + * Applied Robust Statistics<br /> + * Preprint of an upcoming book, University of Minnesota + * </p> + * + * @author Erich Schubert + * + * @apiviz.has ExponentialDistribution + */ +@Reference(title = "Applied Robust Statistics", authors = "D. J. Olive", booktitle = "Applied Robust Statistics", url="http://lagrange.math.siu.edu/Olive/preprints.htm") +public class LaplaceMADEstimator extends AbstractMADEstimator<LaplaceDistribution> { + /** + * Static instance. + */ + public static final LaplaceMADEstimator STATIC = new LaplaceMADEstimator(); + + /** + * Private constructor, use static instance! + */ + private LaplaceMADEstimator() { + // Do not instantiate + } + + @Override + public LaplaceDistribution estimateFromMedianMAD(double median, double mad) { + final double location = median; + final double scale = 1.443 * mad; + return new LaplaceDistribution(1. / scale, location); + } + + @Override + public Class<? super LaplaceDistribution> getDistributionClass() { + return LaplaceDistribution.class; + } + + /** + * Parameterization class. + * + * @author Erich Schubert + * + * @apiviz.exclude + */ + public static class Parameterizer extends AbstractParameterizer { + @Override + protected LaplaceMADEstimator makeInstance() { + return STATIC; + } + } +} diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/LaplaceMLEEstimator.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/LaplaceMLEEstimator.java new file mode 100644 index 00000000..f44e2b3a --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/LaplaceMLEEstimator.java @@ -0,0 +1,94 @@ +package de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +import de.lmu.ifi.dbs.elki.math.statistics.distribution.LaplaceDistribution; +import de.lmu.ifi.dbs.elki.utilities.datastructures.QuickSelect; +import de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike.NumberArrayAdapter; +import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; + +/** + * Estimate Laplace distribution parameters using Median and mean deviation from + * median. + * + * Reference: + * <p> + * R. M. Norton<br /> + * The Double Exponential Distribution: Using Calculus to Find a Maximum + * Likelihood Estimator<br /> + * The American Statistician 38 (2) + * </p> + * + * @author Erich Schubert + * + * @apiviz.has ExponentialDistribution + */ +@Reference(title = "The Double Exponential Distribution: Using Calculus to Find a Maximum Likelihood Estimator", authors = "R. M. Norton", booktitle = "The American Statistician 38 (2)", url = "http://dx.doi.org/10.2307%2F2683252") +public class LaplaceMLEEstimator implements DistributionEstimator<LaplaceDistribution> { + /** + * Static instance. + */ + public static final LaplaceMLEEstimator STATIC = new LaplaceMLEEstimator(); + + /** + * Private constructor, use static instance! + */ + private LaplaceMLEEstimator() { + // Do not instantiate + } + + @Override + public <A> LaplaceDistribution estimate(A data, NumberArrayAdapter<?, A> adapter) { + int len = adapter.size(data); + double[] temp = new double[len]; + for (int i = 0; i < len; i++) { + temp[i] = adapter.getDouble(data, i); + } + double location = QuickSelect.median(temp); + double meandev = 0.; + for (int i = 0; i < len; i++) { + meandev += Math.abs(temp[i] - location); + } + return new LaplaceDistribution(len / meandev, location); + } + + @Override + public Class<? super LaplaceDistribution> getDistributionClass() { + return LaplaceDistribution.class; + } + + /** + * Parameterization class. + * + * @author Erich Schubert + * + * @apiviz.exclude + */ + public static class Parameterizer extends AbstractParameterizer { + @Override + protected LaplaceMLEEstimator makeInstance() { + return STATIC; + } + } +} diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/LogGammaAlternateExpMADEstimator.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/LogGammaAlternateExpMADEstimator.java new file mode 100644 index 00000000..7ea0a6be --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/LogGammaAlternateExpMADEstimator.java @@ -0,0 +1,85 @@ +package de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +import de.lmu.ifi.dbs.elki.math.statistics.distribution.LogGammaAlternateDistribution; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; + +/** + * Robust parameter estimation for the LogGamma distribution. + * + * A modified algorithm for LogGamma distributions. + * + * @author Erich Schubert + * + * @apiviz.has LogGammaAlternateDistribution - - estimates + */ +public class LogGammaAlternateExpMADEstimator extends AbstractExpMADEstimator<LogGammaAlternateDistribution> { + /** + * Static estimator, more robust to outliers by using the median. + */ + public static final LogGammaAlternateExpMADEstimator STATIC = new LogGammaAlternateExpMADEstimator(); + + /** + * Private constructor. + */ + private LogGammaAlternateExpMADEstimator() { + // Do not instantiate - use static class + } + + @Override + public LogGammaAlternateDistribution estimateFromExpMedianMAD(double median, double mad) { + if (median < Double.MIN_NORMAL) { + throw new ArithmeticException("Cannot estimate Gamma parameters on a distribution with zero median."); + } + if (mad < Double.MIN_NORMAL) { + throw new ArithmeticException("Cannot estimate Gamma parameters on a distribution with zero MAD."); + } + + final double b = median / (mad * mad); + final double k = median * b; + if (!(k > 0.) || !(b > 0.)) { + throw new ArithmeticException("LogGammaAlternate estimation produced non-positive parameter values: k=" + k + " b=" + b + " median=" + median + " mad=" + mad); + } + return new LogGammaAlternateDistribution(k, Math.log(b), 0.); + } + + @Override + public Class<? super LogGammaAlternateDistribution> getDistributionClass() { + return LogGammaAlternateDistribution.class; + } + + /** + * Parameterization class. + * + * @author Erich Schubert + * + * @apiviz.exclude + */ + public static class Parameterizer extends AbstractParameterizer { + @Override + protected LogGammaAlternateExpMADEstimator makeInstance() { + return STATIC; + } + } +} diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/LogGammaChoiWetteEstimator.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/LogGammaChoiWetteEstimator.java new file mode 100644 index 00000000..ba3a899d --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/LogGammaChoiWetteEstimator.java @@ -0,0 +1,126 @@ +package de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +import de.lmu.ifi.dbs.elki.math.statistics.distribution.GammaDistribution; +import de.lmu.ifi.dbs.elki.math.statistics.distribution.LogGammaDistribution; +import de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike.NumberArrayAdapter; +import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; + +/** + * Estimate distribution parameters using the method by Choi and Wette. + * + * A modified algorithm for LogGamma distributions. + * + * Reference: + * <p> + * Maximum likelihood estimation of the parameters of the gamma distribution and + * their bias<br /> + * S. C. Choi, R. Wette<br /> + * in: Technometrics + * </p> + * + * @author Erich Schubert + * + * @apiviz.has LogGammaDistribution - - estimates + */ +@Reference(title = "Maximum likelihood estimation of the parameters of the gamma distribution and their bias", authors = "S. C. Choi, R. Wette", booktitle = "Technometrics", url = "http://www.jstor.org/stable/10.2307/1266892") +public class LogGammaChoiWetteEstimator implements DistributionEstimator<LogGammaDistribution> { + /** + * Static estimation, using iterative refinement. + */ + public static final LogGammaChoiWetteEstimator STATIC = new LogGammaChoiWetteEstimator(); + + /** + * Private constructor. + */ + private LogGammaChoiWetteEstimator() { + // Do not instantiate - use static class + } + + @Override + public <A> LogGammaDistribution estimate(A data, NumberArrayAdapter<?, A> adapter) { + final int len = adapter.size(data); + double shift = AbstractLogMOMEstimator.min(data, adapter, 0., 1e-10); + double meanx = 0, meanlogx = 0; + for (int i = 0; i < len; i++) { + double val = adapter.getDouble(data, i) - shift; + if (val <= 0 || Double.isInfinite(val) || Double.isNaN(val)) { + continue; + } + val = Math.log(val); + final double logx = (val > 0) ? Math.log(val) : meanlogx; + final double deltax = val - meanx; + final double deltalogx = logx - meanlogx; + meanx += deltax / (i + 1.); + meanlogx += deltalogx / (i + 1.); + } + if (!(meanx > 0)) { + throw new ArithmeticException("Cannot estimate LogGamma distribution with mean " + meanx); + } + // Initial approximation + final double logmeanx = Math.log(meanx); + final double diff = logmeanx - meanlogx; + double k = (3 - diff + Math.sqrt((diff - 3) * (diff - 3) + 24 * diff)) / (12 * diff); + + // Refine via newton iteration, based on Choi and Wette equation + while (true) { + double kdelta = (Math.log(k) - GammaDistribution.digamma(k) - diff) / (1 / k - GammaDistribution.trigamma(k)); + if (Math.abs(kdelta) / k < 1E-8 || !(kdelta < Double.POSITIVE_INFINITY) || !(kdelta > Double.NEGATIVE_INFINITY)) { + break; + } + k += kdelta; + } + // Estimate theta: + final double theta = k / meanx; + if (!(k > 0.0) || !(theta > 0.0)) { + throw new ArithmeticException("LogGamma estimation produced non-positive parameter values: k=" + k + " theta=" + theta); + } + return new LogGammaDistribution(k, theta, shift - 1); + } + + @Override + public Class<? super LogGammaDistribution> getDistributionClass() { + return LogGammaDistribution.class; + } + + @Override + public String toString() { + return this.getClass().getSimpleName(); + } + + /** + * Parameterization class. + * + * @author Erich Schubert + * + * @apiviz.exclude + */ + public static class Parameterizer extends AbstractParameterizer { + @Override + protected LogGammaChoiWetteEstimator makeInstance() { + return STATIC; + } + } +} diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/LogGammaLogMADEstimator.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/LogGammaLogMADEstimator.java new file mode 100644 index 00000000..ed34870c --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/LogGammaLogMADEstimator.java @@ -0,0 +1,85 @@ +package de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +import de.lmu.ifi.dbs.elki.math.statistics.distribution.LogGammaDistribution; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; + +/** + * Robust parameter estimation for the LogGamma distribution. + * + * A modified algorithm for LogGamma distributions. + * + * @author Erich Schubert + * + * @apiviz.has LogGammaDistribution - - estimates + */ +public class LogGammaLogMADEstimator extends AbstractLogMADEstimator<LogGammaDistribution> { + /** + * Static estimator, more robust to outliers by using the median. + */ + public static final LogGammaLogMADEstimator STATIC = new LogGammaLogMADEstimator(); + + /** + * Private constructor. + */ + private LogGammaLogMADEstimator() { + // Do not instantiate - use static class + } + + @Override + public LogGammaDistribution estimateFromLogMedianMAD(double median, double mad, double shift) { + if (median < Double.MIN_NORMAL) { + throw new ArithmeticException("Cannot estimate Gamma parameters on a distribution with zero median."); + } + if (mad < Double.MIN_NORMAL) { + throw new ArithmeticException("Cannot estimate Gamma parameters on a distribution with zero MAD."); + } + + final double theta = median / (mad * mad); + final double k = median * theta; + if (!(k > 0.) || !(theta > 0.)) { + throw new ArithmeticException("LogGamma estimation produced non-positive parameter values: k=" + k + " theta=" + theta); + } + return new LogGammaDistribution(k, theta, shift - 1); + } + + @Override + public Class<? super LogGammaDistribution> getDistributionClass() { + return LogGammaDistribution.class; + } + + /** + * Parameterization class. + * + * @author Erich Schubert + * + * @apiviz.exclude + */ + public static class Parameterizer extends AbstractParameterizer { + @Override + protected LogGammaLogMADEstimator makeInstance() { + return STATIC; + } + } +} diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/LogGammaLogMOMEstimator.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/LogGammaLogMOMEstimator.java new file mode 100644 index 00000000..ddc6cbb9 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/LogGammaLogMOMEstimator.java @@ -0,0 +1,85 @@ +package de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +import de.lmu.ifi.dbs.elki.math.MeanVariance; +import de.lmu.ifi.dbs.elki.math.statistics.distribution.LogGammaDistribution; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; + +/** + * Simple parameter estimation for the Gamma distribution. + * + * This is a very naive estimation, based on the mean and variance of the log + * transformed values. + * + * @author Erich Schubert + * + * @apiviz.has LogGammaDistribution - - estimates + */ +public class LogGammaLogMOMEstimator extends AbstractLogMeanVarianceEstimator<LogGammaDistribution> { + /** + * Static estimation using just the mean and variance. + */ + public static final LogGammaLogMOMEstimator STATIC = new LogGammaLogMOMEstimator(); + + /** + * Private constructor: use static instance. + */ + private LogGammaLogMOMEstimator() { + // Do not instantiate - use static class + } + + @Override + public LogGammaDistribution estimateFromLogMeanVariance(MeanVariance mv, double shift) { + final double mu = mv.getMean(); + final double var = mv.getSampleVariance(); + if (mu < Double.MIN_NORMAL || var < Double.MIN_NORMAL) { + throw new ArithmeticException("Cannot estimate Gamma parameters on a distribution with zero mean or variance: " + mv.toString()); + } + final double theta = mu / var; + final double k = mu * theta; + if (!(k > 0.) || !(theta > 0.)) { + throw new ArithmeticException("LogGamma estimation produced non-positive parameter values: k=" + k + " theta=" + theta); + } + return new LogGammaDistribution(k, theta, shift - 1); + } + + @Override + public Class<? super LogGammaDistribution> getDistributionClass() { + return LogGammaDistribution.class; + } + + /** + * Parameterization class. + * + * @author Erich Schubert + * + * @apiviz.exclude + */ + public static class Parameterizer extends AbstractParameterizer { + @Override + protected LogGammaLogMOMEstimator makeInstance() { + return STATIC; + } + } +} diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/LogLogisticMADEstimator.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/LogLogisticMADEstimator.java new file mode 100644 index 00000000..61b111c9 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/LogLogisticMADEstimator.java @@ -0,0 +1,81 @@ +package de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +import de.lmu.ifi.dbs.elki.math.MathUtil; +import de.lmu.ifi.dbs.elki.math.statistics.distribution.LogLogisticDistribution; +import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; + +/** + * Estimate Logistic distribution parameters using Median and MAD. + * + * Reference: + * <p> + * D. J. Olive<br /> + * Applied Robust Statistics<br /> + * Preprint of an upcoming book, University of Minnesota + * </p> + * + * @author Erich Schubert + * + * @apiviz.has LogLogisticDistribution + */ +@Reference(title = "Applied Robust Statistics", authors = "D. J. Olive", booktitle = "Applied Robust Statistics", url="http://lagrange.math.siu.edu/Olive/preprints.htm") +public class LogLogisticMADEstimator extends AbstractMADEstimator<LogLogisticDistribution> { + /** + * Static instance. + */ + public static final LogLogisticMADEstimator STATIC = new LogLogisticMADEstimator(); + + /** + * Private constructor, use static instance! + */ + private LogLogisticMADEstimator() { + // Do not instantiate + } + + @Override + public LogLogisticDistribution estimateFromMedianMAD(double median, double mad) { + return new LogLogisticDistribution(1. / median, MathUtil.LOG3 / mad); + } + + @Override + public Class<? super LogLogisticDistribution> getDistributionClass() { + return LogLogisticDistribution.class; + } + + /** + * Parameterization class. + * + * @author Erich Schubert + * + * @apiviz.exclude + */ + public static class Parameterizer extends AbstractParameterizer { + @Override + protected LogLogisticMADEstimator makeInstance() { + return STATIC; + } + } +} diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/LogMADDistributionEstimator.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/LogMADDistributionEstimator.java new file mode 100644 index 00000000..9c281952 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/LogMADDistributionEstimator.java @@ -0,0 +1,45 @@ +package de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +import de.lmu.ifi.dbs.elki.math.statistics.distribution.Distribution; + +/** + * Distribuition estimators that use the method of moments (MOM) in logspace. + * + * @author Erich Schubert + * + * @param <D> Distribution estimated. + */ +public interface LogMADDistributionEstimator<D extends Distribution> extends DistributionEstimator<D> { + /** + * General form of the parameter estimation + * + * @param median Median lof log values. + * @param mad Median absolute deviation from median (in logspace). + * @param shift Shift offset that was used to avoid negative values. + * @return Estimated distribution + */ + D estimateFromLogMedianMAD(double median, double mad, double shift); +} diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/LogMOMDistributionEstimator.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/LogMOMDistributionEstimator.java new file mode 100644 index 00000000..5a589faa --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/LogMOMDistributionEstimator.java @@ -0,0 +1,46 @@ +package de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +import de.lmu.ifi.dbs.elki.math.StatisticalMoments; +import de.lmu.ifi.dbs.elki.math.statistics.distribution.Distribution; + +/** + * Distribuition estimators that use the method of moments (MOM) in logspace, + * i.e. that only need the statistical moments of a data set after logarithms. + * + * @author Erich Schubert + * + * @param <D> Distribution estimated. + */ +public interface LogMOMDistributionEstimator<D extends Distribution> extends DistributionEstimator<D> { + /** + * General form of the parameter estimation + * + * @param moments Statistical moments + * @param shift Shifting offset that was used + * @return Estimated distribution + */ + D estimateFromLogStatisticalMoments(StatisticalMoments moments, double shift); +} diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/LogNormalBilkovaLMMEstimator.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/LogNormalBilkovaLMMEstimator.java new file mode 100644 index 00000000..e8fab89f --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/LogNormalBilkovaLMMEstimator.java @@ -0,0 +1,104 @@ +package de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +import de.lmu.ifi.dbs.elki.math.statistics.distribution.LogNormalDistribution; +import de.lmu.ifi.dbs.elki.math.statistics.distribution.NormalDistribution; +import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; + +/** + * Alternate estimate the parameters of a log Gamma Distribution, using the + * methods of L-Moments (LMM) for the Generalized Normal Distribution. + * + * Reference: + * <p> + * D. Bílková<br /> + * Lognormal distribution and using L-moment method for estimating its + * parameters<br /> + * Int. Journal of Mathematical Models and Methods in Applied Sciences (NAUN) + * </p> + * + * See also {@link LogNormalLMMEstimator} for a similar estimator, based on the + * generalized normal distribution, as used by Hosking. + * + * @author Erich Schubert + * + * @apiviz.has LogNormalDistribution + */ +@Reference(authors = "D. Bílková", title = "Lognormal distribution and using L-moment method for estimating its parameters", booktitle = "Int. Journal of Mathematical Models and Methods in Applied Sciences (NAUN)", url = "http://www.naun.org/multimedia/NAUN/m3as/17-079.pdf") +public class LogNormalBilkovaLMMEstimator extends AbstractLMMEstimator<LogNormalDistribution> { + /** + * Static instance. + */ + public static final LogNormalBilkovaLMMEstimator STATIC = new LogNormalBilkovaLMMEstimator(); + + /** + * Scaling constant. + */ + private static final double SQRT8_3 = Math.sqrt(8. / 3.); + + /** + * Constructor. Private: use static instance. + */ + private LogNormalBilkovaLMMEstimator() { + super(); + } + + @Override + public int getNumMoments() { + return 3; + } + + @Override + public LogNormalDistribution estimateFromLMoments(double[] xmom) { + if (!(xmom[1] > 0.) || !(Math.abs(xmom[2]) < 1.0) || !(xmom[2] > 0.)) { + throw new ArithmeticException("L-Moments invalid"); + } + final double z = SQRT8_3 * NormalDistribution.standardNormalQuantile(.5 * (1. + xmom[2])), z2 = z * z; + final double sigma = 0.999281 * z - 0.006118 * z * z2 + 0.000127 * z * z2 * z2; + final double sigmasqhalf = sigma * sigma * .5; + final double logmu = Math.log(xmom[1] / NormalDistribution.erf(.5 * sigma)) - sigmasqhalf; + return new LogNormalDistribution(logmu, Math.max(sigma, Double.MIN_NORMAL), xmom[0] - Math.exp(logmu + sigmasqhalf)); + } + + @Override + public Class<? super LogNormalDistribution> getDistributionClass() { + return LogNormalDistribution.class; + } + + /** + * Parameterization class. + * + * @author Erich Schubert + * + * @apiviz.exclude + */ + public static class Parameterizer extends AbstractParameterizer { + @Override + protected LogNormalBilkovaLMMEstimator makeInstance() { + return STATIC; + } + } +} diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/LogNormalLMMEstimator.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/LogNormalLMMEstimator.java new file mode 100644 index 00000000..48865d3f --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/LogNormalLMMEstimator.java @@ -0,0 +1,130 @@ +package de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +import de.lmu.ifi.dbs.elki.math.MathUtil; +import de.lmu.ifi.dbs.elki.math.statistics.distribution.LogNormalDistribution; +import de.lmu.ifi.dbs.elki.math.statistics.distribution.NormalDistribution; +import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; + +/** + * Estimate the parameters of a log Normal Distribution, using the methods of + * L-Moments (LMM) for the Generalized Normal Distribution. + * + * Reference: + * <p> + * J. R. M. Hosking<br /> + * Fortran routines for use with the method of L-moments Version 3.03<br /> + * IBM Research. + * </p> + * + * @author Erich Schubert + * + * @apiviz.has LogNormalDistribution + */ +@Reference(authors = "J.R.M. Hosking", title = "Fortran routines for use with the method of L-moments Version 3.03", booktitle = "IBM Research Technical Report") +public class LogNormalLMMEstimator extends AbstractLMMEstimator<LogNormalDistribution> { + /** + * Static instance. + */ + public static final LogNormalLMMEstimator STATIC = new LogNormalLMMEstimator(); + + /** Polynomial approximation */ + private static final double // + A0 = 0.20466534e+01, // + A1 = -0.36544371e+01, // + A2 = 0.18396733e+01, // + A3 = -0.20360244; + + /** Polynomial approximation */ + private static final double // + B1 = -0.20182173e+01, // + B2 = 0.12420401e+01, // + B3 = -0.21741801; + + /** + * Constructor. Private: use static instance. + */ + private LogNormalLMMEstimator() { + super(); + } + + @Override + public int getNumMoments() { + return 3; + } + + @Override + public LogNormalDistribution estimateFromLMoments(double[] xmom) { + // Note: the third condition probably is okay for Generalized Normal, but + // not for lognormal estimation. + if (!(xmom[1] > 0.) || !(Math.abs(xmom[2]) < 1.0) || !(xmom[2] > 0.0)) { + throw new ArithmeticException("L-Moments invalid"); + } + // Generalized Normal Distribution estimation: + double t3 = xmom[2]; + final double location, scale, shape; + if (Math.abs(t3) >= .95) { + // Extreme skewness + location = 0.; + scale = -1; + shape = 0.; + } else if (Math.abs(t3) < 1e-8) { + // t3 effectively zero. + location = xmom[0]; + scale = xmom[1] * MathUtil.SQRTPI; + shape = 0.; + } else { + final double tt = t3 * t3; + shape = -t3 * (A0 + tt * (A1 + tt * (A2 + tt * A3))) / (1. + tt * (B1 + tt * (B2 + tt * B3))); + final double e = Math.exp(.5 * shape * shape); + scale = xmom[1] * shape / (e * NormalDistribution.erf(.5 * shape)); + location = xmom[0] + scale * (e - 1.) / shape; + } + // Estimate logNormal from generalized normal: + final double sigma = -shape; + final double expmu = scale / sigma; + return new LogNormalDistribution(Math.log(expmu), Math.max(sigma, Double.MIN_NORMAL), location - expmu); + } + + @Override + public Class<? super LogNormalDistribution> getDistributionClass() { + return LogNormalDistribution.class; + } + + /** + * Parameterization class. + * + * @author Erich Schubert + * + * @apiviz.exclude + */ + public static class Parameterizer extends AbstractParameterizer { + @Override + protected LogNormalLMMEstimator makeInstance() { + return STATIC; + } + } +} diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/LogNormalLevenbergMarquardtKDEEstimator.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/LogNormalLevenbergMarquardtKDEEstimator.java new file mode 100644 index 00000000..b4b8ff0d --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/LogNormalLevenbergMarquardtKDEEstimator.java @@ -0,0 +1,122 @@ +package de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +import java.util.Arrays; + +import de.lmu.ifi.dbs.elki.math.MeanVariance; +import de.lmu.ifi.dbs.elki.math.linearalgebra.fitting.GaussianFittingFunction; +import de.lmu.ifi.dbs.elki.math.linearalgebra.fitting.LevenbergMarquardtMethod; +import de.lmu.ifi.dbs.elki.math.statistics.KernelDensityEstimator; +import de.lmu.ifi.dbs.elki.math.statistics.distribution.LogNormalDistribution; +import de.lmu.ifi.dbs.elki.math.statistics.kernelfunctions.GaussianKernelDensityFunction; +import de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike.NumberArrayAdapter; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; + +/** + * Distribution parameter estimation using Levenberg-Marquardt iterative + * optimization and a kernel density estimation. + * + * Note: this estimator is rather expensive, and needs optimization in the KDE + * phase, which currently is O(n^2)! + * + * This estimator is primarily attractive when only part of the distribution was + * observed. + * + * @author Erich Schubert + * + * @apiviz.has LogNormalDistribution - - estimates + */ +public class LogNormalLevenbergMarquardtKDEEstimator implements DistributionEstimator<LogNormalDistribution> { + /** + * Static estimator for small sample sizes and <em>partial</em> data. + */ + public static final LogNormalLevenbergMarquardtKDEEstimator STATIC = new LogNormalLevenbergMarquardtKDEEstimator(); + + /** + * Constructor. Private: use static instance! + */ + private LogNormalLevenbergMarquardtKDEEstimator() { + super(); + } + + @Override + public <A> LogNormalDistribution estimate(A data, NumberArrayAdapter<?, A> adapter) { + // We first need the basic parameters: + final int len = adapter.size(data); + MeanVariance mv = new MeanVariance(); + // X positions of samples + double[] x = new double[len]; + for (int i = 0; i < len; i++) { + final double val = adapter.getDouble(data, i); + if (!(val > 0)) { + throw new ArithmeticException("Cannot fit logNormal to a data set which includes non-positive values: " + val); + } + x[i] = Math.log(val); + mv.put(x[i]); + } + // Sort our copy. + Arrays.sort(x); + double median = (x[len >> 1] + x[(len + 1) >> 1]) * .5; + + // Height = density, via KDE. + KernelDensityEstimator de = new KernelDensityEstimator(x, GaussianKernelDensityFunction.KERNEL, 1e-6); + double[] y = de.getDensity(); + + // Weights: + double[] s = new double[len]; + Arrays.fill(s, 1.0); + + // Initial parameter estimate: + double[] params = { median, mv.getSampleStddev(), 1 }; + boolean[] dofit = { true, true, false }; + LevenbergMarquardtMethod fit = new LevenbergMarquardtMethod(GaussianFittingFunction.STATIC, params, dofit, x, y, s); + fit.run(); + double[] ps = fit.getParams(); + return new LogNormalDistribution(ps[0], ps[1], 0.); + } + + @Override + public Class<? super LogNormalDistribution> getDistributionClass() { + return LogNormalDistribution.class; + } + + @Override + public String toString() { + return this.getClass().getSimpleName(); + } + + /** + * Parameterization class. + * + * @author Erich Schubert + * + * @apiviz.exclude + */ + public static class Parameterizer extends AbstractParameterizer { + @Override + protected LogNormalLevenbergMarquardtKDEEstimator makeInstance() { + return STATIC; + } + } +} diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/LogNormalLogMADEstimator.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/LogNormalLogMADEstimator.java new file mode 100644 index 00000000..6ad1dc33 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/LogNormalLogMADEstimator.java @@ -0,0 +1,89 @@ +package de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +import de.lmu.ifi.dbs.elki.math.statistics.distribution.LogNormalDistribution; +import de.lmu.ifi.dbs.elki.math.statistics.distribution.NormalDistribution; +import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; + +/** + * Estimator using Medians. More robust to outliers, and just slightly more + * expensive (needs to copy the data for partial sorting to find the median). + * + * References: + * <p> + * F. R. Hampel<br /> + * The Influence Curve and Its Role in Robust Estimation<br /> + * in: Journal of the American Statistical Association, June 1974, Vol. 69, No. + * 346 + * </p> + * <p> + * P. J. Rousseeuw, C. Croux<br /> + * Alternatives to the Median Absolute Deviation<br /> + * in: Journal of the American Statistical Association, December 1993, Vol. 88, + * No. 424, Theory and Methods + * </p> + * + * @author Erich Schubert + * + * @apiviz.has LogNormalDistribution - - estimates + */ +@Reference(authors = "F. R. Hampel", title = "The Influence Curve and Its Role in Robust Estimation", booktitle = "Journal of the American Statistical Association, June 1974, Vol. 69, No. 346", url = "http://www.jstor.org/stable/10.2307/2285666") +public class LogNormalLogMADEstimator extends AbstractLogMADEstimator<LogNormalDistribution> { + /** + * Static estimator, more robust to outliers by using the median. + */ + public static LogNormalLogMADEstimator STATIC = new LogNormalLogMADEstimator(); + + /** + * Constructor. Private: use static instance! + */ + private LogNormalLogMADEstimator() { + super(); + } + + @Override + public LogNormalDistribution estimateFromLogMedianMAD(double median, double mad, double shift) { + return new LogNormalDistribution(median, Math.max(NormalDistribution.ONEBYPHIINV075 * mad, Double.MIN_NORMAL), shift); + } + + @Override + public Class<? super LogNormalDistribution> getDistributionClass() { + return LogNormalDistribution.class; + } + + /** + * Parameterization class. + * + * @author Erich Schubert + * + * @apiviz.exclude + */ + public static class Parameterizer extends AbstractParameterizer { + @Override + protected LogNormalLogMADEstimator makeInstance() { + return STATIC; + } + } +} diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/LogNormalLogMOMEstimator.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/LogNormalLogMOMEstimator.java new file mode 100644 index 00000000..5b753c54 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/LogNormalLogMOMEstimator.java @@ -0,0 +1,74 @@ +package de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +import de.lmu.ifi.dbs.elki.math.MeanVariance; +import de.lmu.ifi.dbs.elki.math.statistics.distribution.LogNormalDistribution; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; + +/** + * Naive distribution estimation using mean and sample variance. + * + * This is a maximum-likelihood-estimator (MLE). + * + * @author Erich Schubert + * + * @apiviz.has LogNormalDistribution - - estimates + */ +public class LogNormalLogMOMEstimator extends AbstractLogMeanVarianceEstimator<LogNormalDistribution> { + /** + * Static estimator, using mean and variance. + */ + public static LogNormalLogMOMEstimator STATIC = new LogNormalLogMOMEstimator(); + + /** + * Private constructor, use static instance! + */ + private LogNormalLogMOMEstimator() { + super(); + } + + @Override + public LogNormalDistribution estimateFromLogMeanVariance(MeanVariance mv, double shift) { + return new LogNormalDistribution(mv.getMean(), Math.max(mv.getSampleStddev(), Double.MIN_NORMAL), shift); + } + + @Override + public Class<? super LogNormalDistribution> getDistributionClass() { + return LogNormalDistribution.class; + } + + /** + * Parameterization class. + * + * @author Erich Schubert + * + * @apiviz.exclude + */ + public static class Parameterizer extends AbstractParameterizer { + @Override + protected LogNormalLogMOMEstimator makeInstance() { + return STATIC; + } + } +} diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/LogisticLMMEstimator.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/LogisticLMMEstimator.java new file mode 100644 index 00000000..973a91de --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/LogisticLMMEstimator.java @@ -0,0 +1,90 @@ +package de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +import de.lmu.ifi.dbs.elki.math.statistics.distribution.LogisticDistribution; +import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; + +/** + * Estimate the parameters of a Logistic Distribution, using the methods of + * L-Moments (LMM). + * + * Reference: + * <p> + * J. R. M. Hosking<br /> + * Fortran routines for use with the method of L-moments Version 3.03<br /> + * IBM Research. + * </p> + * + * @author Erich Schubert + * + * @apiviz.has LogisticDistribution + */ +@Reference(authors = "J.R.M. Hosking", title = "Fortran routines for use with the method of L-moments Version 3.03", booktitle = "IBM Research Technical Report") +public class LogisticLMMEstimator extends AbstractLMMEstimator<LogisticDistribution> { + /** + * Static instance. + */ + public static final LogisticLMMEstimator STATIC = new LogisticLMMEstimator(); + + /** + * Constructor. Private: use static instance. + */ + private LogisticLMMEstimator() { + super(); + } + + @Override + public int getNumMoments() { + return 2; + } + + @Override + public LogisticDistribution estimateFromLMoments(double[] xmom) { + // The original publication would also estimate a shape, but we don't have + // the generalized logistic distribution yet. + // So we continue as if the Type II shape is 0, fairly trivial: + return new LogisticDistribution(xmom[0], xmom[1]); + } + + @Override + public Class<? super LogisticDistribution> getDistributionClass() { + return LogisticDistribution.class; + } + + /** + * Parameterization class. + * + * @author Erich Schubert + * + * @apiviz.exclude + */ + public static class Parameterizer extends AbstractParameterizer { + @Override + protected LogisticLMMEstimator makeInstance() { + return STATIC; + } + } +} diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/LogisticMADEstimator.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/LogisticMADEstimator.java new file mode 100644 index 00000000..45181486 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/LogisticMADEstimator.java @@ -0,0 +1,80 @@ +package de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +import de.lmu.ifi.dbs.elki.math.MathUtil; +import de.lmu.ifi.dbs.elki.math.statistics.distribution.LogisticDistribution; +import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; + +/** + * Estimate Logistic distribution parameters using Median and MAD. + * + * Reference: + * <p> + * Robust Estimators for Transformed Location Scale Families<br /> + * D. J. Olive + * </p> + * + * @author Erich Schubert + * + * @apiviz.has LogisticDistribution + */ +@Reference(title = "Robust Estimators for Transformed Location Scale Families", authors = "D. J. Olive", booktitle = "") +public class LogisticMADEstimator extends AbstractMADEstimator<LogisticDistribution> { + /** + * Static instance. + */ + public static final LogisticMADEstimator STATIC = new LogisticMADEstimator(); + + /** + * Private constructor, use static instance! + */ + private LogisticMADEstimator() { + // Do not instantiate + } + + @Override + public LogisticDistribution estimateFromMedianMAD(double median, double mad) { + return new LogisticDistribution(median, mad / MathUtil.LOG3); + } + + @Override + public Class<? super LogisticDistribution> getDistributionClass() { + return LogisticDistribution.class; + } + + /** + * Parameterization class. + * + * @author Erich Schubert + * + * @apiviz.exclude + */ + public static class Parameterizer extends AbstractParameterizer { + @Override + protected LogisticMADEstimator makeInstance() { + return STATIC; + } + } +} diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/MADDistributionEstimator.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/MADDistributionEstimator.java new file mode 100644 index 00000000..6bf2b3ae --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/MADDistributionEstimator.java @@ -0,0 +1,45 @@ +package de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +import de.lmu.ifi.dbs.elki.math.statistics.distribution.Distribution; + +/** + * Distribuition estimators that use the method of moments (MOM), i.e. that only + * need the statistical moments of a data set. + * + * @author Erich Schubert + * + * @param <D> Distribution estimated. + */ +public interface MADDistributionEstimator<D extends Distribution> extends DistributionEstimator<D> { + /** + * General form of the parameter estimation + * + * @param median Median value + * @param mad Median absolute deviation from median + * @return Estimated distribution + */ + D estimateFromMedianMAD(double median, double mad); +} diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/MOMDistributionEstimator.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/MOMDistributionEstimator.java new file mode 100644 index 00000000..383f68cc --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/MOMDistributionEstimator.java @@ -0,0 +1,45 @@ +package de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +import de.lmu.ifi.dbs.elki.math.StatisticalMoments; +import de.lmu.ifi.dbs.elki.math.statistics.distribution.Distribution; + +/** + * Distribuition estimators that use the method of moments (MOM), i.e. that only + * need the statistical moments of a data set. + * + * @author Erich Schubert + * + * @param <D> Distribution estimated. + */ +public interface MOMDistributionEstimator<D extends Distribution> extends DistributionEstimator<D> { + /** + * General form of the parameter estimation + * + * @param moments Statistical moments + * @return Estimated distribution + */ + D estimateFromStatisticalMoments(StatisticalMoments moments); +} diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/MeanVarianceDistributionEstimator.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/MeanVarianceDistributionEstimator.java new file mode 100644 index 00000000..4d84465f --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/MeanVarianceDistributionEstimator.java @@ -0,0 +1,45 @@ +package de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +import de.lmu.ifi.dbs.elki.math.statistics.distribution.Distribution; +import de.lmu.ifi.dbs.elki.math.MeanVariance; + +/** + * Interface for estimators that only need mean and variance. + * + * These can implicitely (obviously) also handle full statistical moments. + * + * @author Erich Schubert + * + * @param <D> Distribution type + */ +public interface MeanVarianceDistributionEstimator<D extends Distribution> extends MOMDistributionEstimator<D> { + /** + * Estimate the distribution from mean and variance. + * + * @param mv Mean and variance. + * @return Distribution + */ + D estimateFromMeanVariance(MeanVariance mv); +} diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/NormalLMMEstimator.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/NormalLMMEstimator.java new file mode 100644 index 00000000..67880950 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/NormalLMMEstimator.java @@ -0,0 +1,87 @@ +package de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +import de.lmu.ifi.dbs.elki.math.MathUtil; +import de.lmu.ifi.dbs.elki.math.statistics.distribution.NormalDistribution; +import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; + +/** + * Estimate the parameters of a normal distribution using the method of + * L-Moments (LMM). + * + * Reference: + * <p> + * J. R. M. Hosking<br /> + * Fortran routines for use with the method of L-moments Version 3.03<br /> + * IBM Research. + * </p> + * + * @author Erich Schubert + * + * @apiviz.has NormalDistribution + */ +@Reference(authors = "J.R.M. Hosking", title = "Fortran routines for use with the method of L-moments Version 3.03", booktitle = "IBM Research Technical Report") +public class NormalLMMEstimator extends AbstractLMMEstimator<NormalDistribution> { + /** + * Static instance + */ + public static final NormalLMMEstimator STATIC = new NormalLMMEstimator(); + + /** + * Constructor. Private: use static instance. + */ + private NormalLMMEstimator() { + super(); + } + + @Override + public int getNumMoments() { + return 2; + } + + @Override + public NormalDistribution estimateFromLMoments(double[] xmom) { + return new NormalDistribution(xmom[0], Math.max(xmom[1] * MathUtil.SQRTPI, Double.MIN_NORMAL)); + } + + @Override + public Class<? super NormalDistribution> getDistributionClass() { + return NormalDistribution.class; + } + + /** + * Parameterization class. + * + * @author Erich Schubert + * + * @apiviz.exclude + */ + public static class Parameterizer extends AbstractParameterizer { + @Override + protected NormalLMMEstimator makeInstance() { + return STATIC; + } + } +} diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/NormalLevenbergMarquardtKDEEstimator.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/NormalLevenbergMarquardtKDEEstimator.java new file mode 100644 index 00000000..bbbcda76 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/NormalLevenbergMarquardtKDEEstimator.java @@ -0,0 +1,118 @@ +package de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +import java.util.Arrays; + +import de.lmu.ifi.dbs.elki.math.MeanVariance; +import de.lmu.ifi.dbs.elki.math.linearalgebra.fitting.GaussianFittingFunction; +import de.lmu.ifi.dbs.elki.math.linearalgebra.fitting.LevenbergMarquardtMethod; +import de.lmu.ifi.dbs.elki.math.statistics.KernelDensityEstimator; +import de.lmu.ifi.dbs.elki.math.statistics.distribution.NormalDistribution; +import de.lmu.ifi.dbs.elki.math.statistics.kernelfunctions.GaussianKernelDensityFunction; +import de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike.NumberArrayAdapter; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; + +/** + * Distribution parameter estimation using Levenberg-Marquardt iterative + * optimization and a kernel density estimation. + * + * Note: this estimator is rather expensive, and needs optimization in the KDE + * phase, which currently is O(n^2)! + * + * This estimator is primarily attractive when only part of the distribution was + * observed. + * + * @author Erich Schubert + * + * @apiviz.has NormalDistribution - - estimates + */ +public class NormalLevenbergMarquardtKDEEstimator implements DistributionEstimator<NormalDistribution> { + /** + * Static estimator for small sample sizes and <em>partial</em> data. + */ + public static final NormalLevenbergMarquardtKDEEstimator STATIC = new NormalLevenbergMarquardtKDEEstimator(); + + /** + * Constructor. Private: use static instance. + */ + private NormalLevenbergMarquardtKDEEstimator() { + super(); + } + + @Override + public <A> NormalDistribution estimate(A data, NumberArrayAdapter<?, A> adapter) { + // We first need the basic parameters: + final int len = adapter.size(data); + MeanVariance mv = new MeanVariance(); + // X positions of samples + double[] x = new double[len]; + for (int i = 0; i < len; i++) { + x[i] = adapter.getDouble(data, i); + mv.put(x[i]); + } + // Sort our copy. + Arrays.sort(x); + double median = (x[len >> 1] + x[(len + 1) >> 1]) * .5; + + // Height = density, via KDE. + KernelDensityEstimator de = new KernelDensityEstimator(x, GaussianKernelDensityFunction.KERNEL, 1e-6); + double[] y = de.getDensity(); + + // Weights: + double[] s = new double[len]; + Arrays.fill(s, 1.0); + + // Initial parameter estimate: + double[] params = { median, mv.getSampleStddev(), 1 }; + boolean[] dofit = { true, true, false }; + LevenbergMarquardtMethod fit = new LevenbergMarquardtMethod(GaussianFittingFunction.STATIC, params, dofit, x, y, s); + fit.run(); + double[] ps = fit.getParams(); + return new NormalDistribution(ps[0], ps[1]); + } + + @Override + public Class<? super NormalDistribution> getDistributionClass() { + return NormalDistribution.class; + } + + @Override + public String toString() { + return this.getClass().getSimpleName(); + } + + /** + * Parameterization class. + * + * @author Erich Schubert + * + * @apiviz.exclude + */ + public static class Parameterizer extends AbstractParameterizer { + @Override + protected NormalLevenbergMarquardtKDEEstimator makeInstance() { + return STATIC; + } + } +} diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/NormalMADEstimator.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/NormalMADEstimator.java new file mode 100644 index 00000000..2221ad4b --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/NormalMADEstimator.java @@ -0,0 +1,88 @@ +package de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +import de.lmu.ifi.dbs.elki.math.statistics.distribution.NormalDistribution; +import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; + +/** + * Estimator using Medians. More robust to outliers, and just slightly more + * expensive (needs to copy the data for partial sorting to find the median). + * + * References: + * <p> + * F. R. Hampel<br /> + * The Influence Curve and Its Role in Robust Estimation<br /> + * in: Journal of the American Statistical Association, June 1974, Vol. 69, No. + * 346 + * </p> + * <p> + * P. J. Rousseeuw, C. Croux<br /> + * Alternatives to the Median Absolute Deviation<br /> + * in: Journal of the American Statistical Association, December 1993, Vol. 88, + * No. 424, Theory and Methods + * </p> + * + * @author Erich Schubert + * + * @apiviz.has NormalDistribution - - estimates + */ +@Reference(authors = "F. R. Hampel", title = "The Influence Curve and Its Role in Robust Estimation", booktitle = "Journal of the American Statistical Association, June 1974, Vol. 69, No. 346", url = "http://www.jstor.org/stable/10.2307/2285666") +public class NormalMADEstimator extends AbstractMADEstimator<NormalDistribution> { + /** + * Static estimator, more robust to outliers by using the median. + */ + public static NormalMADEstimator STATIC = new NormalMADEstimator(); + + /** + * Constructor. Private: use static instance! + */ + private NormalMADEstimator() { + super(); + } + + @Override + public NormalDistribution estimateFromMedianMAD(double median, double mad) { + return new NormalDistribution(median, Math.max(NormalDistribution.ONEBYPHIINV075 * mad, Double.MIN_NORMAL)); + } + + @Override + public Class<? super NormalDistribution> getDistributionClass() { + return NormalDistribution.class; + } + + /** + * Parameterization class. + * + * @author Erich Schubert + * + * @apiviz.exclude + */ + public static class Parameterizer extends AbstractParameterizer { + @Override + protected NormalMADEstimator makeInstance() { + return STATIC; + } + } +} diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/NormalMOMEstimator.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/NormalMOMEstimator.java new file mode 100644 index 00000000..dae05eb0 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/NormalMOMEstimator.java @@ -0,0 +1,76 @@ +package de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +import de.lmu.ifi.dbs.elki.math.MeanVariance; +import de.lmu.ifi.dbs.elki.math.statistics.distribution.NormalDistribution; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; + +/** + * Naive maximum-likelihood estimations for the normal distribution using mean + * and sample variance. + * + * While this is the most commonly used estimator, it is not very robust against + * extreme values. + * + * @author Erich Schubert + * + * @apiviz.has NormalDistribution - - estimates + */ +public class NormalMOMEstimator extends AbstractMeanVarianceEstimator<NormalDistribution> { + /** + * Static estimator, using mean and variance. + */ + public static NormalMOMEstimator STATIC = new NormalMOMEstimator(); + + /** + * Private constructor, use static instance! + */ + private NormalMOMEstimator() { + // Do not instantiate + } + + @Override + public NormalDistribution estimateFromMeanVariance(MeanVariance mv) { + return new NormalDistribution(mv.getMean(), Math.max(mv.getSampleStddev(), Double.MIN_NORMAL)); + } + + @Override + public Class<? super NormalDistribution> getDistributionClass() { + return NormalDistribution.class; + } + + /** + * Parameterization class. + * + * @author Erich Schubert + * + * @apiviz.exclude + */ + public static class Parameterizer extends AbstractParameterizer { + @Override + protected NormalMOMEstimator makeInstance() { + return STATIC; + } + } +} diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/RayleighLMMEstimator.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/RayleighLMMEstimator.java new file mode 100644 index 00000000..a827e1e8 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/RayleighLMMEstimator.java @@ -0,0 +1,85 @@ +package de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +import de.lmu.ifi.dbs.elki.math.MathUtil; +import de.lmu.ifi.dbs.elki.math.statistics.distribution.RayleighDistribution; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; + +/** + * Estimate the scale parameter of a (non-shifted) RayleighDistribution using + * the method of L-Moments (LMM). + * + * @author Erich Schubert + * + * @apiviz.has RayleighDistribution - - estimates + */ +public class RayleighLMMEstimator extends AbstractLMMEstimator<RayleighDistribution> { + /** + * Static instance. + */ + public static final RayleighLMMEstimator STATIC = new RayleighLMMEstimator(); + + /** + * Constructor. Private: use static instance! + */ + private RayleighLMMEstimator() { + super(); + } + + @Override + public int getNumMoments() { + return 2; + } + + @Override + public RayleighDistribution estimateFromLMoments(double[] xmom) { + double sigma = 2. * xmom[1] / (MathUtil.SQRTPI * (MathUtil.SQRT2 - 1.)); + double mu = xmom[0] - sigma * MathUtil.SQRTHALFPI; + return new RayleighDistribution(mu, sigma); + } + + @Override + public Class<? super RayleighDistribution> getDistributionClass() { + return RayleighDistribution.class; + } + + @Override + public String toString() { + return this.getClass().getSimpleName(); + } + + /** + * Parameterization class. + * + * @author Erich Schubert + * + * @apiviz.exclude + */ + public static class Parameterizer extends AbstractParameterizer { + @Override + protected RayleighLMMEstimator makeInstance() { + return STATIC; + } + } +} diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/RayleighMADEstimator.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/RayleighMADEstimator.java new file mode 100644 index 00000000..7382ada2 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/RayleighMADEstimator.java @@ -0,0 +1,90 @@ +package de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +import de.lmu.ifi.dbs.elki.math.statistics.distribution.RayleighDistribution; +import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; + +/** + * Estimate the parameters of a RayleighDistribution using the MAD. + * + * Reference: + * <p> + * D. J. Olive<br /> + * Applied Robust Statistics<br /> + * Preprint of an upcoming book, University of Minnesota + * </p> + * + * @author Erich Schubert + * + * @apiviz.has RayleighDistribution - - estimates + */ +@Reference(title = "Applied Robust Statistics", authors = "D. J. Olive", booktitle = "Applied Robust Statistics", url = "http://lagrange.math.siu.edu/Olive/preprints.htm") +public class RayleighMADEstimator extends AbstractMADEstimator<RayleighDistribution> { + /** + * Static instance. + */ + public static final RayleighMADEstimator STATIC = new RayleighMADEstimator(); + + /** + * Constructor. Private: use static instance! + */ + private RayleighMADEstimator() { + super(); + } + + /** + * See reference for the derivation of this constants. + */ + private static final double F1 = 1. / 0.448453, F2 = 1.17741 * F1; + + @Override + public RayleighDistribution estimateFromMedianMAD(double median, double mad) { + return new RayleighDistribution(median - F2 * mad, F1 * mad); + } + + @Override + public Class<? super RayleighDistribution> getDistributionClass() { + return RayleighDistribution.class; + } + + @Override + public String toString() { + return this.getClass().getSimpleName(); + } + + /** + * Parameterization class. + * + * @author Erich Schubert + * + * @apiviz.exclude + */ + public static class Parameterizer extends AbstractParameterizer { + @Override + protected RayleighMADEstimator makeInstance() { + return STATIC; + } + } +} diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/RayleighMLEEstimator.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/RayleighMLEEstimator.java new file mode 100644 index 00000000..aa5dc300 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/RayleighMLEEstimator.java @@ -0,0 +1,84 @@ +package de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +import de.lmu.ifi.dbs.elki.math.statistics.distribution.RayleighDistribution; +import de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike.NumberArrayAdapter; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; + +/** + * Estimate the scale parameter of a (non-shifted) RayleighDistribution using a + * maximum likelihood estimate. + * + * @author Erich Schubert + * + * @apiviz.has RayleighDistribution - - estimates + */ +public class RayleighMLEEstimator implements DistributionEstimator<RayleighDistribution> { + /** + * Static instance. + */ + public static final RayleighMLEEstimator STATIC = new RayleighMLEEstimator(); + + /** + * Constructor. Private: use static instance! + */ + private RayleighMLEEstimator() { + super(); + } + + @Override + public <A> RayleighDistribution estimate(A data, NumberArrayAdapter<?, A> adapter) { + final int len = adapter.size(data); + double sumsq = 0.; + for(int i = 0; i < len; i++) { + double v = adapter.getDouble(data, i); + sumsq += v * v; + } + return new RayleighDistribution(Math.sqrt(.5 * sumsq / len)); + } + + @Override + public Class<? super RayleighDistribution> getDistributionClass() { + return RayleighDistribution.class; + } + + @Override + public String toString() { + return this.getClass().getSimpleName(); + } + + /** + * Parameterization class. + * + * @author Erich Schubert + * + * @apiviz.exclude + */ + public static class Parameterizer extends AbstractParameterizer { + @Override + protected RayleighMLEEstimator makeInstance() { + return STATIC; + } + } +} diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/SkewGNormalLMMEstimator.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/SkewGNormalLMMEstimator.java new file mode 100644 index 00000000..df05eef9 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/SkewGNormalLMMEstimator.java @@ -0,0 +1,125 @@ +package de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +import de.lmu.ifi.dbs.elki.math.MathUtil; +import de.lmu.ifi.dbs.elki.math.statistics.distribution.NormalDistribution; +import de.lmu.ifi.dbs.elki.math.statistics.distribution.SkewGeneralizedNormalDistribution; +import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; + +/** + * Estimate the parameters of a skew Normal Distribution (Hoskin's Generalized + * Normal Distribution), using the methods of L-Moments (LMM). + * + * Reference: + * <p> + * J. R. M. Hosking<br /> + * Fortran routines for use with the method of L-moments Version 3.03<br /> + * IBM Research. + * </p> + * + * @author Erich Schubert + * + * @apiviz.has SkewGeneralizedNormalDistribution + */ +@Reference(authors = "J.R.M. Hosking", title = "Fortran routines for use with the method of L-moments Version 3.03", booktitle = "IBM Research Technical Report") +public class SkewGNormalLMMEstimator extends AbstractLMMEstimator<SkewGeneralizedNormalDistribution> { + /** + * Static instance. + */ + public static final SkewGNormalLMMEstimator STATIC = new SkewGNormalLMMEstimator(); + + /** Polynomial approximation */ + private static final double // + A0 = 0.20466534e+01, // + A1 = -0.36544371e+01, // + A2 = 0.18396733e+01, // + A3 = -0.20360244; + + /** Polynomial approximation */ + private static final double // + B1 = -0.20182173e+01, // + B2 = 0.12420401e+01, // + B3 = -0.21741801; + + /** + * Constructor. Private: use static instance. + */ + private SkewGNormalLMMEstimator() { + super(); + } + + @Override + public int getNumMoments() { + return 3; + } + + @Override + public SkewGeneralizedNormalDistribution estimateFromLMoments(double[] xmom) { + if (!(xmom[1] > 0.) || !(Math.abs(xmom[2]) < 1.0)) { + throw new ArithmeticException("L-Moments invalid"); + } + // Generalized Normal Distribution estimation: + double t3 = xmom[2]; + final double location, scale, shape; + if (Math.abs(t3) >= .95) { + // Extreme skewness + location = 0.; + scale = -1.; + shape = 0.; + } else if (Math.abs(t3) <= 1e-8) { + // t3 effectively zero. + location = xmom[0]; + scale = xmom[1] * MathUtil.SQRTPI; + shape = 0.; + } else { + final double tt = t3 * t3; + shape = -t3 * (A0 + tt * (A1 + tt * (A2 + tt * A3))) / (1. + tt * (B1 + tt * (B2 + tt * B3))); + final double e = Math.exp(.5 * shape * shape); + scale = xmom[1] * shape / (e * NormalDistribution.erf(.5 * shape)); + location = xmom[0] + scale * (e - 1.) / shape; + } + return new SkewGeneralizedNormalDistribution(location, scale, shape); + } + + @Override + public Class<? super SkewGeneralizedNormalDistribution> getDistributionClass() { + return SkewGeneralizedNormalDistribution.class; + } + + /** + * Parameterization class. + * + * @author Erich Schubert + * + * @apiviz.exclude + */ + public static class Parameterizer extends AbstractParameterizer { + @Override + protected SkewGNormalLMMEstimator makeInstance() { + return STATIC; + } + } +} diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/UniformEnhancedMinMaxEstimator.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/UniformEnhancedMinMaxEstimator.java new file mode 100644 index 00000000..834b0d94 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/UniformEnhancedMinMaxEstimator.java @@ -0,0 +1,100 @@ +package de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +import de.lmu.ifi.dbs.elki.math.DoubleMinMax; +import de.lmu.ifi.dbs.elki.math.statistics.distribution.UniformDistribution; +import de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike.NumberArrayAdapter; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; + +/** + * Slightly improved estimation, that takes sample size into account and + * enhances the interval appropriately. + * + * @author Erich Schubert + * + * @apiviz.has UniformDistribution - - estimates + */ +public class UniformEnhancedMinMaxEstimator implements DistributionEstimator<UniformDistribution> { + /** + * Slightly more refined estimator: takes sample size into account. + */ + public static final UniformEnhancedMinMaxEstimator STATIC = new UniformEnhancedMinMaxEstimator(); + + /** + * Constructor. Private: use static instance! + */ + private UniformEnhancedMinMaxEstimator() { + super(); + } + + @Override + public <A> UniformDistribution estimate(A data, NumberArrayAdapter<?, A> adapter) { + final int len = adapter.size(data); + DoubleMinMax mm = new DoubleMinMax(); + for (int i = 0; i < len; i++) { + final double val = adapter.getDouble(data, i); + if (val > Double.NEGATIVE_INFINITY && val < Double.POSITIVE_INFINITY) { + mm.put(val); + } + } + return estimate(mm.getMin(), mm.getMax(), len); + } + + /** + * Estimate from simple characteristics. + * + * @param min Minimum + * @param max Maximum + * @param count Number of observations + * @return Distribution + */ + public UniformDistribution estimate(double min, double max, final int count) { + double grow = (count > 1) ? 0.5 * (max - min) / (count - 1) : 0.; + return new UniformDistribution(Math.max(min - grow, -Double.MAX_VALUE), Math.min(max + grow, Double.MAX_VALUE)); + } + + @Override + public Class<? super UniformDistribution> getDistributionClass() { + return UniformDistribution.class; + } + + @Override + public String toString() { + return this.getClass().getSimpleName(); + } + + /** + * Parameterization class. + * + * @author Erich Schubert + * + * @apiviz.exclude + */ + public static class Parameterizer extends AbstractParameterizer { + @Override + protected UniformEnhancedMinMaxEstimator makeInstance() { + return STATIC; + } + } +} diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/UniformLMMEstimator.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/UniformLMMEstimator.java new file mode 100644 index 00000000..5b3e868f --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/UniformLMMEstimator.java @@ -0,0 +1,77 @@ +package de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +import de.lmu.ifi.dbs.elki.math.statistics.distribution.UniformDistribution; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; + +/** + * Estimate the parameters of a normal distribution using the method of + * L-Moments (LMM). + * + * @author Erich Schubert + * + * @apiviz.has UniformDistribution + */ +public class UniformLMMEstimator extends AbstractLMMEstimator<UniformDistribution> { + /** + * Static instance + */ + public static final UniformLMMEstimator STATIC = new UniformLMMEstimator(); + + /** + * Constructor. Private: use static instance. + */ + private UniformLMMEstimator() { + super(); + } + + @Override + public int getNumMoments() { + return 2; + } + + @Override + public UniformDistribution estimateFromLMoments(double[] xmom) { + return new UniformDistribution(Math.max(xmom[0] - 3 * xmom[1], -Double.MAX_VALUE), Math.min(xmom[0] + 3 * xmom[1], Double.MAX_VALUE)); + } + + @Override + public Class<? super UniformDistribution> getDistributionClass() { + return UniformDistribution.class; + } + + /** + * Parameterization class. + * + * @author Erich Schubert + * + * @apiviz.exclude + */ + public static class Parameterizer extends AbstractParameterizer { + @Override + protected UniformLMMEstimator makeInstance() { + return STATIC; + } + } +} diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/UniformMADEstimator.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/UniformMADEstimator.java new file mode 100644 index 00000000..47dad134 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/UniformMADEstimator.java @@ -0,0 +1,80 @@ +package de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +import de.lmu.ifi.dbs.elki.math.statistics.distribution.UniformDistribution; +import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; + +/** + * Estimate Uniform distribution parameters using Median and MAD. + * + * Reference: + * <p> + * D. J. Olive<br /> + * Applied Robust Statistics<br /> + * Preprint of an upcoming book, University of Minnesota + * </p> + * + * @author Erich Schubert + * + * @apiviz.has UniformDistribution + */ +@Reference(title = "Applied Robust Statistics", authors = "D. J. Olive", booktitle = "Applied Robust Statistics", url="http://lagrange.math.siu.edu/Olive/preprints.htm") +public class UniformMADEstimator extends AbstractMADEstimator<UniformDistribution> { + /** + * Static instance. + */ + public static final UniformMADEstimator STATIC = new UniformMADEstimator(); + + /** + * Private constructor, use static instance! + */ + private UniformMADEstimator() { + // Do not instantiate + } + + @Override + public UniformDistribution estimateFromMedianMAD(double median, double mad) { + return new UniformDistribution(Math.max(median - 2 * mad, -Double.MAX_VALUE), Math.min(median + 2 * mad, Double.MAX_VALUE)); + } + + @Override + public Class<? super UniformDistribution> getDistributionClass() { + return UniformDistribution.class; + } + + /** + * Parameterization class. + * + * @author Erich Schubert + * + * @apiviz.exclude + */ + public static class Parameterizer extends AbstractParameterizer { + @Override + protected UniformMADEstimator makeInstance() { + return STATIC; + } + } +} diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/UniformMinMaxEstimator.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/UniformMinMaxEstimator.java new file mode 100644 index 00000000..e9870884 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/UniformMinMaxEstimator.java @@ -0,0 +1,107 @@ +package de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +import de.lmu.ifi.dbs.elki.math.DoubleMinMax; +import de.lmu.ifi.dbs.elki.math.statistics.distribution.Distribution; +import de.lmu.ifi.dbs.elki.math.statistics.distribution.UniformDistribution; +import de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike.NumberArrayAdapter; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; + +/** + * Estimate the uniform distribution by computing min and max. + * + * @author Erich Schubert + * + * @apiviz.has UniformDistribution - - estimates + */ +public class UniformMinMaxEstimator implements DistributionEstimator<UniformDistribution> { + /** + * The most naive estimator possible: uses minimum and maximum. + */ + public static final UniformMinMaxEstimator STATIC = new UniformMinMaxEstimator(); + + /** + * Constructor. Private: use static instance! + */ + private UniformMinMaxEstimator() { + super(); + } + + @Override + public <A> UniformDistribution estimate(A data, NumberArrayAdapter<?, A> adapter) { + final int len = adapter.size(data); + DoubleMinMax mm = new DoubleMinMax(); + for (int i = 0; i < len; i++) { + final double val = adapter.getDouble(data, i); + if (val > Double.NEGATIVE_INFINITY && val < Double.POSITIVE_INFINITY) { + mm.put(val); + } + } + return estimate(mm); + } + + /** + * Estimate parameters from minimum and maximum observed. + * + * @param mm Minimum and Maximum + * @return Estimation + */ + public UniformDistribution estimate(DoubleMinMax mm) { + return new UniformDistribution(Math.max(mm.getMin(), -Double.MAX_VALUE), Math.min(mm.getMax(), Double.MAX_VALUE)); + } + + /** + * Estimate parameters from minimum and maximum observed. + * + * @param mm Minimum and Maximum + * @return Estimation + */ + public Distribution estimate(double min, double max) { + return new UniformDistribution(min, max); + } + + @Override + public Class<? super UniformDistribution> getDistributionClass() { + return UniformDistribution.class; + } + + @Override + public String toString() { + return this.getClass().getSimpleName(); + } + + /** + * Parameterization class. + * + * @author Erich Schubert + * + * @apiviz.exclude + */ + public static class Parameterizer extends AbstractParameterizer { + @Override + protected UniformMinMaxEstimator makeInstance() { + return STATIC; + } + } +} diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/WaldMLEstimator.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/WaldMLEstimator.java new file mode 100644 index 00000000..16a33f89 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/WaldMLEstimator.java @@ -0,0 +1,85 @@ +package de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +import de.lmu.ifi.dbs.elki.math.statistics.distribution.WaldDistribution; +import de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike.NumberArrayAdapter; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; + +/** + * Estimate parameter of the Wald distribution. + * + * @author Erich Schubert + * + * @apiviz.has WaldDistribution + */ +public class WaldMLEstimator implements DistributionEstimator<WaldDistribution> { + /** + * Static instance. + */ + public static final WaldMLEstimator STATIC = new WaldMLEstimator(); + + @Override + public <A> WaldDistribution estimate(A data, NumberArrayAdapter<?, A> adapter) { + final int len = adapter.size(data); + double mean = 0.; + for(int i = 0; i < len; i++) { + double v = adapter.getDouble(data, i); + mean += v; + } + mean /= len; + double invmean = 1. / mean; + double invdev = 0.; + for(int i = 0; i < len; i++) { + double v = adapter.getDouble(data, i); + if(v > 0.) { + invdev += 1. / v - invmean; + } + } + return new WaldDistribution(mean, len / invdev); + } + + @Override + public Class<? super WaldDistribution> getDistributionClass() { + return WaldDistribution.class; + } + + @Override + public String toString() { + return this.getClass().getSimpleName(); + } + + /** + * Parameterization class. + * + * @author Erich Schubert + * + * @apiviz.exclude + */ + public static class Parameterizer extends AbstractParameterizer { + @Override + protected WaldMLEstimator makeInstance() { + return STATIC; + } + } +} diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/WaldMOMEstimator.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/WaldMOMEstimator.java new file mode 100644 index 00000000..82b70936 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/WaldMOMEstimator.java @@ -0,0 +1,71 @@ +package de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +import de.lmu.ifi.dbs.elki.math.MeanVariance; +import de.lmu.ifi.dbs.elki.math.statistics.distribution.WaldDistribution; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; + +/** + * Estimate parameter of the Wald distribution. + * + * @author Erich Schubert + * + * @apiviz.has WaldDistribution + */ +public class WaldMOMEstimator extends AbstractMeanVarianceEstimator<WaldDistribution> { + /** + * Static instance. + */ + public static final WaldMOMEstimator STATIC = new WaldMOMEstimator(); + + @Override + public WaldDistribution estimateFromMeanVariance(MeanVariance mv) { + double mean = mv.getMean(); + return new WaldDistribution(mean, mean * mean * mean / mv.getSampleVariance()); + } + + @Override + public Class<? super WaldDistribution> getDistributionClass() { + return WaldDistribution.class; + } + + @Override + public String toString() { + return this.getClass().getSimpleName(); + } + + /** + * Parameterization class. + * + * @author Erich Schubert + * + * @apiviz.exclude + */ + public static class Parameterizer extends AbstractParameterizer { + @Override + protected WaldMOMEstimator makeInstance() { + return STATIC; + } + } +} diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/WeibullLMMEstimator.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/WeibullLMMEstimator.java new file mode 100644 index 00000000..9d7d8e8e --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/WeibullLMMEstimator.java @@ -0,0 +1,85 @@ +package de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +import de.lmu.ifi.dbs.elki.math.statistics.distribution.GammaDistribution; +import de.lmu.ifi.dbs.elki.math.statistics.distribution.WeibullDistribution; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; + +/** + * Estimate parameters of the Weibull distribution using the method of L-Moments + * (LMM). + * + * @author Erich Schubert + * + * @apiviz.has WeibullDistribution + */ +public class WeibullLMMEstimator extends AbstractLMMEstimator<WeibullDistribution> { + /** + * Static instance. + */ + public static final WeibullLMMEstimator STATIC = new WeibullLMMEstimator(); + + /** + * Constructor. Private: use static instance! + */ + private WeibullLMMEstimator() { + super(); + } + + @Override + public int getNumMoments() { + return 3; + } + + @Override + public WeibullDistribution estimateFromLMoments(double[] xmom) { + double l = xmom[2], l2 = l * l, l3 = l2 * l, l4 = l3 * l, l5 = l4 * l, l6 = l5 * l; + double k = 285.3 * l6 - 658.6 * l5 + 622.8 * l4 - 317.2 * l3 + 98.52 * l2 - 21.256 * l + 3.516; + + double gam = GammaDistribution.gamma(1. + 1. / k); + double lambda = xmom[1] / (1. - Math.pow(2., -1. / k) * gam); + double mu = xmom[0] - lambda * gam; + + return new WeibullDistribution(k, lambda, mu); + } + + @Override + public Class<? super WeibullDistribution> getDistributionClass() { + return WeibullDistribution.class; + } + + /** + * Parameterization class. + * + * @author Erich Schubert + * + * @apiviz.exclude + */ + public static class Parameterizer extends AbstractParameterizer { + @Override + protected WeibullLMMEstimator makeInstance() { + return STATIC; + } + } +} diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/WeibullLogMADEstimator.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/WeibullLogMADEstimator.java new file mode 100644 index 00000000..aacceae7 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/WeibullLogMADEstimator.java @@ -0,0 +1,85 @@ +package de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +import de.lmu.ifi.dbs.elki.math.MathUtil; +import de.lmu.ifi.dbs.elki.math.statistics.distribution.WeibullDistribution; +import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; + +/** + * Parameter estimation via median and median absolute deviation from median + * (MAD). + * + * Reference: + * <p> + * D. J. Olive<br /> + * Applied Robust Statistics<br /> + * Preprint of an upcoming book, University of Minnesota + * </p> + * + * @author Erich Schubert + * + * @apiviz.has WeibullDistribution - - estimates + */ +@Reference(title = "Applied Robust Statistics", authors = "D. J. Olive", booktitle = "Applied Robust Statistics", url="http://lagrange.math.siu.edu/Olive/preprints.htm") +public class WeibullLogMADEstimator extends AbstractLogMADEstimator<WeibullDistribution> { + /** + * The more robust median based estimator. + */ + public static final WeibullLogMADEstimator STATIC = new WeibullLogMADEstimator(); + + /** + * Constructor. Private: use static instance! + */ + private WeibullLogMADEstimator() { + super(); + } + + @Override + public WeibullDistribution estimateFromLogMedianMAD(double median, double mad, double shift) { + double isigma = 1.30370 / mad; + double lambda = Math.exp(isigma * median - MathUtil.LOGLOG2); + + return new WeibullDistribution(isigma, lambda); + } + + @Override + public Class<? super WeibullDistribution> getDistributionClass() { + return WeibullDistribution.class; + } + + /** + * Parameterization class. + * + * @author Erich Schubert + * + * @apiviz.exclude + */ + public static class Parameterizer extends AbstractParameterizer { + @Override + protected WeibullLogMADEstimator makeInstance() { + return STATIC; + } + } +} diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/WeibullLogMOMEstimator.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/WeibullLogMOMEstimator.java new file mode 100644 index 00000000..9182a7ce --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/WeibullLogMOMEstimator.java @@ -0,0 +1,101 @@ +package de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +import de.lmu.ifi.dbs.elki.math.MeanVariance; +import de.lmu.ifi.dbs.elki.math.statistics.distribution.WeibullDistribution; +import de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike.NumberArrayAdapter; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; + +/** + * Naive parameter estimation via least squares. + * + * TODO: this doesn't seem to work very well yet. Buggy? + * + * TODO: the naming is misleading: while it uses some method of moments, it + * doesn't use "the" statistical moments. + * + * @author Erich Schubert + * + * @apiviz.has WeibullDistribution - - estimates + */ +public class WeibullLogMOMEstimator implements DistributionEstimator<WeibullDistribution> { + /** + * The naive least-squares estimator. + */ + public static final WeibullLogMOMEstimator STATIC = new WeibullLogMOMEstimator(); + + /** + * Constructor. Private: use static instance! + */ + private WeibullLogMOMEstimator() { + super(); + } + + @Override + public <A> WeibullDistribution estimate(A data, NumberArrayAdapter<?, A> adapter) { + double beta1 = 0.0, beta3 = 0.0; + MeanVariance mvlogx = new MeanVariance(); + int size = adapter.size(data); + double size1 = size + 1.; + for (int i = 0; i < size; i++) { + final double val = adapter.getDouble(data, i); + if (!(val > 0)) { + throw new ArithmeticException("Cannot least squares fit weibull to a data set which includes non-positive values: " + val); + } + final double yi = Math.log(-Math.log((size - i) / size1)); + final double logxi = Math.log(val); + beta1 += yi * logxi; + beta3 += yi; + mvlogx.put(logxi); + } + double k = (beta1 / size - beta3 / size * mvlogx.getMean()) / mvlogx.getSampleVariance(); + double lambda = 1. / Math.exp(beta3 / size - k * mvlogx.getMean()); + + return new WeibullDistribution(k, lambda); + } + + @Override + public Class<? super WeibullDistribution> getDistributionClass() { + return WeibullDistribution.class; + } + + @Override + public String toString() { + return this.getClass().getSimpleName(); + } + + /** + * Parameterization class. + * + * @author Erich Schubert + * + * @apiviz.exclude + */ + public static class Parameterizer extends AbstractParameterizer { + @Override + protected WeibullLogMOMEstimator makeInstance() { + return STATIC; + } + } +} diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/meta/BestFitEstimator.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/meta/BestFitEstimator.java new file mode 100644 index 00000000..dee3cbb3 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/meta/BestFitEstimator.java @@ -0,0 +1,472 @@ +package de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator.meta; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; + +import de.lmu.ifi.dbs.elki.logging.Logging; +import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress; +import de.lmu.ifi.dbs.elki.math.StatisticalMoments; +import de.lmu.ifi.dbs.elki.math.statistics.ProbabilityWeightedMoments; +import de.lmu.ifi.dbs.elki.math.statistics.distribution.Distribution; +import de.lmu.ifi.dbs.elki.math.statistics.distribution.UniformDistribution; +import de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator.CauchyMADEstimator; +import de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator.DistributionEstimator; +import de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator.EMGOlivierNorbergEstimator; +import de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator.ExponentialLMMEstimator; +import de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator.ExponentialMADEstimator; +import de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator.ExponentialMOMEstimator; +import de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator.ExponentialMedianEstimator; +import de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator.GammaLMMEstimator; +import de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator.GammaMADEstimator; +import de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator.GammaMOMEstimator; +import de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator.GeneralizedExtremeValueLMMEstimator; +import de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator.GeneralizedLogisticAlternateLMMEstimator; +import de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator.GumbelLMMEstimator; +import de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator.GumbelMADEstimator; +import de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator.LMMDistributionEstimator; +import de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator.LaplaceLMMEstimator; +import de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator.LaplaceMADEstimator; +import de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator.LogGammaLogMADEstimator; +import de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator.LogGammaLogMOMEstimator; +import de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator.LogLogisticMADEstimator; +import de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator.LogMADDistributionEstimator; +import de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator.LogMOMDistributionEstimator; +import de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator.LogNormalBilkovaLMMEstimator; +import de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator.LogNormalLMMEstimator; +import de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator.LogNormalLogMADEstimator; +import de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator.LogNormalLogMOMEstimator; +import de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator.LogisticLMMEstimator; +import de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator.LogisticMADEstimator; +import de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator.MADDistributionEstimator; +import de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator.MOMDistributionEstimator; +import de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator.NormalLMMEstimator; +import de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator.NormalMADEstimator; +import de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator.NormalMOMEstimator; +import de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator.RayleighLMMEstimator; +import de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator.RayleighMADEstimator; +import de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator.SkewGNormalLMMEstimator; +import de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator.UniformEnhancedMinMaxEstimator; +import de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator.UniformLMMEstimator; +import de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator.UniformMADEstimator; +import de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator.UniformMinMaxEstimator; +import de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator.WaldMOMEstimator; +import de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator.WeibullLMMEstimator; +import de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator.WeibullLogMADEstimator; +import de.lmu.ifi.dbs.elki.math.statistics.tests.KolmogorovSmirnovTest; +import de.lmu.ifi.dbs.elki.utilities.datastructures.QuickSelect; +import de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike.ArrayLikeUtil; +import de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike.NumberArrayAdapter; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; + +/** + * A meta estimator that will try a number of (inexpensive) estimations, then + * choose whichever works best. + * + * @author Erich Schubert + * + * @apiviz.composedOf MOMDistributionEstimator + * @apiviz.composedOf MADDistributionEstimator + * @apiviz.composedOf LMMDistributionEstimator + * @apiviz.composedOf LogMOMDistributionEstimator + * @apiviz.composedOf LogMADDistributionEstimator + */ +public class BestFitEstimator implements DistributionEstimator<Distribution> { + /** + * Class logger. + */ + private static final Logging LOG = Logging.getLogger(BestFitEstimator.class); + + /** + * Static instance. + */ + public static final BestFitEstimator STATIC = new BestFitEstimator(); + + /** + * Mean and variance based estimators. + */ + private Collection<MOMDistributionEstimator<?>> momests; + + /** + * Median average deviation from median estimators. + */ + private Collection<MADDistributionEstimator<?>> madests; + + /** + * L-Moment estimators. + */ + private Collection<LMMDistributionEstimator<?>> lmmests; + + /** + * Logspace Method of Moments estimators. + */ + private Collection<LogMOMDistributionEstimator<?>> logmomests; + + /** + * Logspace Median average deviation from median estimators. + */ + private Collection<LogMADDistributionEstimator<?>> logmadests; + + /** + * Constructor. Use static instance instead! + */ + protected BestFitEstimator() { + super(); + momests = new ArrayList<>(5); + momests.add(NormalMOMEstimator.STATIC); + momests.add(GammaMOMEstimator.STATIC); + momests.add(WaldMOMEstimator.STATIC); + momests.add(ExponentialMOMEstimator.STATIC); + momests.add(EMGOlivierNorbergEstimator.STATIC); + madests = new ArrayList<>(11); + madests.add(NormalMADEstimator.STATIC); + madests.add(GammaMADEstimator.STATIC); + madests.add(ExponentialMADEstimator.STATIC); + madests.add(ExponentialMedianEstimator.STATIC); + madests.add(LaplaceMADEstimator.STATIC); + madests.add(GumbelMADEstimator.STATIC); + madests.add(CauchyMADEstimator.STATIC); + madests.add(LogisticMADEstimator.STATIC); + madests.add(LogLogisticMADEstimator.STATIC); + madests.add(RayleighMADEstimator.STATIC); + madests.add(UniformMADEstimator.STATIC); + lmmests = new ArrayList<>(14); + lmmests.add(NormalLMMEstimator.STATIC); + lmmests.add(GammaLMMEstimator.STATIC); + lmmests.add(ExponentialLMMEstimator.STATIC); + lmmests.add(LaplaceLMMEstimator.STATIC); + lmmests.add(GumbelLMMEstimator.STATIC); + lmmests.add(LogisticLMMEstimator.STATIC); + lmmests.add(GeneralizedLogisticAlternateLMMEstimator.STATIC); + lmmests.add(LogNormalLMMEstimator.STATIC); + lmmests.add(LogNormalBilkovaLMMEstimator.STATIC); + lmmests.add(SkewGNormalLMMEstimator.STATIC); + lmmests.add(GeneralizedExtremeValueLMMEstimator.STATIC); + lmmests.add(RayleighLMMEstimator.STATIC); + lmmests.add(WeibullLMMEstimator.STATIC); + lmmests.add(UniformLMMEstimator.STATIC); + logmomests = new ArrayList<>(2); + logmomests.add(LogNormalLogMOMEstimator.STATIC); + logmomests.add(LogGammaLogMOMEstimator.STATIC); + logmadests = new ArrayList<>(3); + logmadests.add(LogNormalLogMADEstimator.STATIC); + logmadests.add(LogGammaLogMADEstimator.STATIC); + logmadests.add(WeibullLogMADEstimator.STATIC); + } + + @Override + public <A> Distribution estimate(A data, NumberArrayAdapter<?, A> adapter) { + int numlmm = 0; + for (LMMDistributionEstimator<?> est : lmmests) { + numlmm = Math.max(numlmm, est.getNumMoments()); + } + + final int len = adapter.size(data); + + // Build various statistics: + StatisticalMoments mom = new StatisticalMoments(), logmom = new StatisticalMoments(); + double[] x = new double[len], scratch = new double[len], logx = new double[len]; + + if (LOG.isDebuggingFine()) { + LOG.debugFine("Computing statistical moments and L-Moments."); + } + for (int i = 0; i < len; i++) { + final double val = adapter.getDouble(data, i); + x[i] = val; + mom.put(val); + } + if (mom.getMax() <= mom.getMin()) { + LOG.warning("Constant distribution detected. Cannot fit."); + return new UniformDistribution(mom.getMin() - .1, mom.getMax() + .1); + } + // Sort: for L-Moments, but getting the median is now also cheap. + Arrays.sort(x); + double[] lmm; + try { + lmm = (numlmm > 0) ? ProbabilityWeightedMoments.samLMR(x, ArrayLikeUtil.DOUBLEARRAYADAPTER, numlmm) : null; + } catch (ArithmeticException e) { + lmm = null; + } + final double min = x[0], median = .5 * (x[len >> 1] + x[(len + 1) >> 1]), max = x[len - 1]; + if (LOG.isDebuggingFine()) { + LOG.debugFine("Computing statistical moments in logspace."); + } + // Build logspace copy: + double shift = Math.min(0., min - (max - min) * 1e-10); + for (int i = 0; i < len; i++) { + double val = x[i] - shift; + val = val > 0. ? Math.log(val) : Double.NEGATIVE_INFINITY; + logx[i] = val; + if (!Double.isInfinite(val) && !Double.isNaN(val)) { + logmom.put(val); + } + } + double logmedian = .5 * (logx[len >> 1] + logx[(len + 1) >> 1]); + if (LOG.isDebuggingFine()) { + LOG.debugFine("Computing MADs."); + } + double mad = computeMAD(x, median, scratch, len); + double logmad = computeMAD(logx, logmedian, scratch, len); + + Distribution best = null; + double bestscore = Double.POSITIVE_INFINITY; + DistributionEstimator<?> bestest = null; + + final int numest = momests.size() + madests.size() + lmmests.size() + logmomests.size() + logmadests.size() + 2; + FiniteProgress prog = LOG.isDebuggingFine() ? new FiniteProgress("Finding best matching distribution", numest, LOG) : null; + for (MOMDistributionEstimator<?> est : momests) { + try { + Distribution d = est.estimateFromStatisticalMoments(mom); + double score = testFit(x, scratch, d); + if (LOG.isDebuggingFine()) { + LOG.debugFine(est.getClass().getSimpleName() + ": " + score + " " + d.toString()); + } + if (score < bestscore) { + best = d; + bestscore = score; + bestest = est; + } + } catch (ArithmeticException e) { + if (LOG.isDebuggingFine()) { + LOG.debugFine("Fitting distribution " + est.getClass().getSimpleName() + " failed: " + e.getMessage()); + } + } + if (prog != null) { + prog.incrementProcessed(LOG); + } + } + for (MADDistributionEstimator<?> est : madests) { + try { + Distribution d = est.estimateFromMedianMAD(median, mad); + double score = testFit(x, scratch, d); + if (LOG.isDebuggingFine()) { + LOG.debugFine(est.getClass().getSimpleName() + ": " + score + " " + d.toString()); + } + if (score < bestscore) { + best = d; + bestscore = score; + bestest = est; + } + } catch (ArithmeticException e) { + if (LOG.isDebuggingFine()) { + LOG.debugFine("Fitting distribution " + est.getClass().getSimpleName() + " failed: " + e.getMessage()); + } + } + if (prog != null) { + prog.incrementProcessed(LOG); + } + } + for (LMMDistributionEstimator<?> est : lmmests) { + if (lmm != null) { + try { + Distribution d = est.estimateFromLMoments(lmm); + double score = testFit(x, scratch, d); + if (LOG.isDebuggingFine()) { + LOG.debugFine(est.getClass().getSimpleName() + ": " + score + " " + d.toString()); + } + if (score < bestscore) { + best = d; + bestscore = score; + bestest = est; + } + } catch (ArithmeticException e) { + if (LOG.isDebuggingFine()) { + LOG.debugFine("Fitting distribution " + est.getClass().getSimpleName() + " failed: " + e.getMessage()); + } + } + } + if (prog != null) { + prog.incrementProcessed(LOG); + } + } + for (LogMOMDistributionEstimator<?> est : logmomests) { + try { + Distribution d = est.estimateFromLogStatisticalMoments(logmom, shift); + double score = testFit(x, scratch, d); + if (LOG.isDebuggingFine()) { + LOG.debugFine(est.getClass().getSimpleName() + ": " + score + " " + d.toString()); + } + if (score < bestscore) { + best = d; + bestscore = score; + bestest = est; + } + } catch (ArithmeticException e) { + if (LOG.isDebuggingFine()) { + LOG.debugFine("Fitting distribution " + est.getClass().getSimpleName() + " failed: " + e.getMessage()); + } + } + if (prog != null) { + prog.incrementProcessed(LOG); + } + } + for (LogMADDistributionEstimator<?> est : logmadests) { + try { + Distribution d = est.estimateFromLogMedianMAD(logmedian, logmad, shift); + double score = testFit(x, scratch, d); + if (LOG.isDebuggingFine()) { + LOG.debugFine(est.getClass().getSimpleName() + ": " + score + " " + d.toString()); + } + if (score < bestscore) { + best = d; + bestscore = score; + bestest = est; + } + } catch (ArithmeticException e) { + if (LOG.isDebuggingFine()) { + LOG.debugFine("Fitting distribution " + est.getClass().getSimpleName() + " failed: " + e.getMessage()); + } + } + if (prog != null) { + prog.incrementProcessed(LOG); + } + } + { // Uniform estimators. + final UniformMinMaxEstimator est = UniformMinMaxEstimator.STATIC; + try { + Distribution d = est.estimate(min, max); + double score = testFit(x, scratch, d); + if (LOG.isDebuggingFine()) { + LOG.debugFine(est.getClass().getSimpleName() + ": " + score + " " + d.toString()); + } + if (score < bestscore) { + best = d; + bestscore = score; + bestest = est; + } + } catch (ArithmeticException e) { + if (LOG.isDebuggingFine()) { + LOG.debugFine("Fitting distribution " + est.getClass().getSimpleName() + " failed: " + e.getMessage()); + } + } + if (prog != null) { + prog.incrementProcessed(LOG); + } + } + { // Uniform estimators. + final UniformEnhancedMinMaxEstimator est = UniformEnhancedMinMaxEstimator.STATIC; + try { + Distribution d = est.estimate(min, max, len); + double score = testFit(x, scratch, d); + if (LOG.isDebuggingFine()) { + LOG.debugFine(est.getClass().getSimpleName() + ": " + score + " " + d.toString()); + } + if (score < bestscore) { + best = d; + bestscore = score; + bestest = est; + } + } catch (ArithmeticException e) { + if (LOG.isDebuggingFine()) { + LOG.debugFine("Fitting distribution " + est.getClass().getSimpleName() + " failed: " + e.getMessage()); + } + } + if (prog != null) { + prog.incrementProcessed(LOG); + } + } + if (prog != null) { + prog.ensureCompleted(LOG); + } + + if (LOG.isVeryVerbose()) { + LOG.veryverbose("Best distribution fit: " + bestscore + " " + best.toString() + " via " + bestest); + } + + return best; + } + + public double computeMAD(double[] data, double median, double[] scratch, final int len) { + // Compute LogMAD: + for (int i = 0; i < len; i++) { + scratch[i] = Math.abs(data[i] - median); + } + double logmad = QuickSelect.median(scratch); + // Adjust LogMAD if 0: + if (!(logmad > 0.)) { + double xmin = Double.POSITIVE_INFINITY; + for (int i = (len >> 1); i < len; i++) { + if (scratch[i] > 0. && scratch[i] < xmin) { + xmin = scratch[i]; + } + } + if (!Double.isInfinite(xmin)) { + logmad = xmin; + } + } + return logmad; + } + + /** + * Test the quality of a fit. + * + * @param x Input data + * @param test Scratch space for testing (will be overwritten!) + * @param dist Distribution + * @return K-S-Test score + * @throws ArithmeticException + */ + private double testFit(double[] x, double[] test, Distribution dist) throws ArithmeticException { + for (int i = 0; i < test.length; i++) { + test[i] = dist.cdf(x[i]); + if (test[i] > 1.) { + test[i] = 1.; + } + if (test[i] < 0.) { + test[i] = 0.; + } + if (Double.isNaN(test[i])) { + throw new ArithmeticException("Got NaN after fitting " + dist.toString()); + } + } + // Should actually be sorted already... + Arrays.sort(test); + return KolmogorovSmirnovTest.simpleTest(test); + } + + @Override + public Class<? super Distribution> getDistributionClass() { + return Distribution.class; // No guarantees, sorry. + } + + @Override + public String toString() { + return this.getClass().getSimpleName(); + } + + /** + * Parameterization class. + * + * @author Erich Schubert + * + * @apiviz.exclude + */ + public static class Parameterizer extends AbstractParameterizer { + @Override + protected BestFitEstimator makeInstance() { + return STATIC; + } + } +} diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/meta/TrimmedEstimator.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/meta/TrimmedEstimator.java new file mode 100644 index 00000000..5c1cf448 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/meta/TrimmedEstimator.java @@ -0,0 +1,156 @@ +package de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator.meta; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +import java.util.Arrays; + +import de.lmu.ifi.dbs.elki.math.statistics.distribution.Distribution; +import de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator.DistributionEstimator; +import de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike.ArrayLikeUtil; +import de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike.NumberArrayAdapter; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterConstraint; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.LessConstraint; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter; + +/** + * Trimmed wrapper around other estimators. Sorts the data, trims it, then + * analyzes it using another estimator. + * + * @author Erich Schubert + * + * @param <D> Distribution type + */ +public class TrimmedEstimator<D extends Distribution> implements DistributionEstimator<D> { + /** + * Distribution estimator to use. + */ + private DistributionEstimator<D> inner; + + /** + * Amount of data to trim. + */ + private double trim; + + /** + * Constructor. + * + * @param inner Inner estimator. + * @param trim Trimming parameter. + */ + public TrimmedEstimator(DistributionEstimator<D> inner, double trim) { + super(); + this.inner = inner; + this.trim = trim; + } + + @Override + public <A> D estimate(A data, NumberArrayAdapter<?, A> adapter) { + // We first need the basic parameters: + int len = adapter.size(data); + final int cut = ((int) (len * trim)) >> 1; + // X positions of samples + double[] x = new double[len]; + for (int i = 0; i < len; i++) { + final double val = adapter.getDouble(data, i); + x[i] = val; + } + // Sort our copy. + Arrays.sort(x); + { // Trim: + // TODO: is it more efficient to just copy, or instead use a trimmed array + // adapter? + double[] trimmed = new double[len - 2 * cut]; + System.arraycopy(x, cut, trimmed, 0, trimmed.length); + x = trimmed; + len = trimmed.length; + } + return inner.estimate(x, ArrayLikeUtil.DOUBLEARRAYADAPTER); + } + + @Override + public Class<? super D> getDistributionClass() { + return inner.getDistributionClass(); + } + + @Override + public String toString() { + return this.getClass().getSimpleName() + "(" + inner.toString() + ", trim=" + trim + ")"; + } + + /** + * Parameterization class. + * + * @author Erich Schubert + * + * @apiviz.exclude + * + * @param <D> Distribution type + */ + public static class Parameterizer<D extends Distribution> extends AbstractParameterizer { + /** + * Option for the class to use on the trimmed sample. + */ + public static final OptionID INNER_ID = new OptionID("trimmedestimate.inner", "Estimator to use on the trimmed data."); + + /** + * Option for specifying the amount of data to trim. + */ + public static final OptionID TRIM_ID = new OptionID("trimmedestimate.trim", "Relative amount of data to trim on each end, must be 0 < trim < 0.5"); + + /** + * Distribution estimator to use. + */ + private DistributionEstimator<D> inner; + + /** + * Amount of data to trim. + */ + private double trim; + + @Override + protected void makeOptions(Parameterization config) { + super.makeOptions(config); + ObjectParameter<DistributionEstimator<D>> innerP = new ObjectParameter<>(INNER_ID, DistributionEstimator.class); + if (config.grab(innerP)) { + inner = innerP.instantiateClass(config); + } + + DoubleParameter trimP = new DoubleParameter(TRIM_ID); + trimP.addConstraint(new GreaterConstraint(0.)); + trimP.addConstraint(new LessConstraint(0.5)); + if (config.grab(trimP)) { + trim = trimP.doubleValue(); + } + } + + @Override + protected TrimmedEstimator<D> makeInstance() { + return new TrimmedEstimator<>(inner, trim); + } + } +} diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/meta/WinsorisingEstimator.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/meta/WinsorisingEstimator.java new file mode 100644 index 00000000..0ef6318d --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/meta/WinsorisingEstimator.java @@ -0,0 +1,166 @@ +package de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator.meta; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +import de.lmu.ifi.dbs.elki.math.statistics.distribution.Distribution; +import de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator.DistributionEstimator; +import de.lmu.ifi.dbs.elki.utilities.datastructures.QuickSelect; +import de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike.ArrayLikeUtil; +import de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike.NumberArrayAdapter; +import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterConstraint; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.LessConstraint; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter; + +/** + * Winsorising or Georgization estimator. Similar to trimming, this is expected + * to be more robust to outliers. However, instead of removing the extreme + * values, they are instead replaced with the cutoff value. This keeps the + * quantity of the data the same, and will have a lower impact on variance and + * similar measures. + * + * Reference: + * <p> + * C. Hastings, F. Mosteller, J. W. Tukey, C. P. Winsor<br /> + * Low moments for small samples: a comparative study of order statistics.<br /> + * The Annals of Mathematical Statistics, 18(3) * + * </p> + * + * @author Erich Schubert + * + * @param <D> Distribution type + */ +@Reference(authors = "C. Hastings, F. Mosteller, J. W. Tukey, C. P. Winsor", title = "Low moments for small samples: a comparative study of order statistics", booktitle = "The Annals of Mathematical Statistics, 18(3)", url = "http://dx.doi.org/10.1214/aoms/1177730388") +public class WinsorisingEstimator<D extends Distribution> implements DistributionEstimator<D> { + /** + * Distribution estimator to use. + */ + private DistributionEstimator<D> inner; + + /** + * Amount of data to winsorize. + */ + private double winsorize; + + /** + * Constructor. + * + * @param inner Inner estimator. + * @param winsorize Winsorize parameter. + */ + public WinsorisingEstimator(DistributionEstimator<D> inner, double winsorize) { + super(); + this.inner = inner; + this.winsorize = winsorize; + } + + @Override + public <A> D estimate(A data, NumberArrayAdapter<?, A> adapter) { + // We first need the basic parameters: + int len = adapter.size(data); + final int cut = ((int) (len * winsorize)) >> 1; + // X positions of samples + double[] x = new double[len]; + for (int i = 0; i < len; i++) { + final double val = adapter.getDouble(data, i); + x[i] = val; + } + // Partially sort our copy. + double min = QuickSelect.quickSelect(x, 0, len, cut); + double max = QuickSelect.quickSelect(x, cut, len, len - 1 - cut); + // Winsorize by replacing the smallest and largest values. + // QuickSelect ensured that these are correctly in place. + for (int i = 0, j = len - 1; i < cut; i++, j--) { + x[i] = min; + x[j] = max; + } + return inner.estimate(x, ArrayLikeUtil.DOUBLEARRAYADAPTER); + } + + @Override + public Class<? super D> getDistributionClass() { + return inner.getDistributionClass(); + } + + @Override + public String toString() { + return this.getClass().getSimpleName() + "(" + inner.toString() + ", trim=" + winsorize + ")"; + } + + /** + * Parameterization class. + * + * @author Erich Schubert + * + * @apiviz.exclude + * + * @param <D> Distribution type + */ + public static class Parameterizer<D extends Distribution> extends AbstractParameterizer { + /** + * Option for the class to use on the winsorized sample. + */ + public static final OptionID INNER_ID = new OptionID("winsorize.inner", "Estimator to use on the winsorized data."); + + /** + * Option for specifying the amount of data to winsorize. + */ + public static final OptionID WINSORIZE_ID = new OptionID("winsorize.winsorize", "Relative amount of data to winsorize on each end, must be 0 < winsorize < 0.5"); + + /** + * Distribution estimator to use. + */ + private DistributionEstimator<D> inner; + + /** + * Amount of data to winsorize. + */ + private double winsorize; + + @Override + protected void makeOptions(Parameterization config) { + super.makeOptions(config); + ObjectParameter<DistributionEstimator<D>> innerP = new ObjectParameter<>(INNER_ID, DistributionEstimator.class); + if (config.grab(innerP)) { + inner = innerP.instantiateClass(config); + } + + DoubleParameter trimP = new DoubleParameter(WINSORIZE_ID); + trimP.addConstraint(new GreaterConstraint(0.)); + trimP.addConstraint(new LessConstraint(0.5)); + if (config.grab(trimP)) { + winsorize = trimP.doubleValue(); + } + } + + @Override + protected WinsorisingEstimator<D> makeInstance() { + return new WinsorisingEstimator<>(inner, winsorize); + } + } +} diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/meta/package-info.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/meta/package-info.java new file mode 100644 index 00000000..c4b75f2d --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/meta/package-info.java @@ -0,0 +1,6 @@ +/** + * Meta estimators: estimators that do not actually estimate themselves, but instead use other estimators, e.g. on a trimmed data set, or as an ensemble. + * + * @author Erich Schubert + */ +package de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator.meta;
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/package-info.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/package-info.java new file mode 100644 index 00000000..62c98262 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/package-info.java @@ -0,0 +1,29 @@ +/** + * Estimators for statistical distributions. + * + * @author Erich Schubert + */ +package de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ |