diff options
Diffstat (limited to 'src/de/lmu/ifi/dbs/elki/utilities/scaling')
25 files changed, 968 insertions, 289 deletions
diff --git a/src/de/lmu/ifi/dbs/elki/utilities/scaling/ClipScaling.java b/src/de/lmu/ifi/dbs/elki/utilities/scaling/ClipScaling.java index aad0b63a..068e1230 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/scaling/ClipScaling.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/scaling/ClipScaling.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.scaling; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/utilities/scaling/GammaScaling.java b/src/de/lmu/ifi/dbs/elki/utilities/scaling/GammaScaling.java index b23b9400..daff7275 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/scaling/GammaScaling.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/scaling/GammaScaling.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.scaling; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/utilities/scaling/IdentityScaling.java b/src/de/lmu/ifi/dbs/elki/utilities/scaling/IdentityScaling.java index a2202b92..7f24431d 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/scaling/IdentityScaling.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/scaling/IdentityScaling.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.scaling; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/utilities/scaling/LinearScaling.java b/src/de/lmu/ifi/dbs/elki/utilities/scaling/LinearScaling.java index 87f42835..ef28c434 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/scaling/LinearScaling.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/scaling/LinearScaling.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.scaling; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/utilities/scaling/MinusLogScaling.java b/src/de/lmu/ifi/dbs/elki/utilities/scaling/MinusLogScaling.java index b5007ad1..8e497dd9 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/scaling/MinusLogScaling.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/scaling/MinusLogScaling.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.scaling; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/utilities/scaling/ScalingFunction.java b/src/de/lmu/ifi/dbs/elki/utilities/scaling/ScalingFunction.java index 6bd0527f..c738aa9a 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/scaling/ScalingFunction.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/scaling/ScalingFunction.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.scaling; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/utilities/scaling/StaticScalingFunction.java b/src/de/lmu/ifi/dbs/elki/utilities/scaling/StaticScalingFunction.java index 228f5f1a..27d12db4 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/scaling/StaticScalingFunction.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/scaling/StaticScalingFunction.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.scaling; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/utilities/scaling/outlier/COPOutlierScaling.java b/src/de/lmu/ifi/dbs/elki/utilities/scaling/outlier/COPOutlierScaling.java new file mode 100644 index 00000000..202d1044 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/utilities/scaling/outlier/COPOutlierScaling.java @@ -0,0 +1,172 @@ +package de.lmu.ifi.dbs.elki.utilities.scaling.outlier; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +import java.util.Arrays; + +import de.lmu.ifi.dbs.elki.database.ids.DBIDIter; +import de.lmu.ifi.dbs.elki.database.relation.Relation; +import de.lmu.ifi.dbs.elki.math.statistics.distribution.Distribution; +import de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator.meta.BestFitEstimator; +import de.lmu.ifi.dbs.elki.result.outlier.InvertedOutlierScoreMeta; +import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult; +import de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike.ArrayLikeUtil; +import de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike.NumberArrayAdapter; +import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; +import de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter; + +/** + * CDF based outlier score scaling. + * + * Enhanced version of the scaling proposed in: + * <p> + * H.-P. Kriegel, P. Kröger, E. Schubert, A. Zimek<br /> + * Interpreting and Unifying Outlier Scores<br /> + * Proc. 11th SIAM International Conference on Data Mining (SDM), Mesa, AZ, 2011 + * </p> + * + * See also: + * <p> + * Hans-Peter Kriegel, Peer Kröger, Erich Schubert, Arthur Zimek<br /> + * Outlier Detection in Arbitrarily Oriented Subspaces<br /> + * in: Proc. IEEE International Conference on Data Mining (ICDM 2012) + * </p> + * + * @author Erich Schubert + */ +@Reference(authors = "H.-P. Kriegel, P. Kröger, E. Schubert, A. Zimek", title = "Interpreting and Unifying Outlier Scores", booktitle = "Proc. 11th SIAM International Conference on Data Mining (SDM), Mesa, AZ, 2011", url = "http://siam.omnibooksonline.com/2011datamining/data/papers/018.pdf") +public class COPOutlierScaling implements OutlierScalingFunction { + /** + * Phi parameter. + */ + private double phi = 0.; + + /** + * Score distribution. + */ + private Distribution dist; + + /** + * Inversion flag. + */ + private boolean inverted = false; + + /** + * Constructor. + * + * @param phi Phi parameter + */ + public COPOutlierScaling(double phi) { + super(); + this.phi = phi; + } + + /** + * Secondary reference. + */ + @Reference(authors = "Hans-Peter Kriegel, Peer Kröger, Erich Schubert, Arthur Zimek", title = "Outlier Detection in Arbitrarily Oriented Subspaces", booktitle = "Proc. IEEE International Conference on Data Mining (ICDM 2012)") + public static final void secondReference() { + // Dummy, reference attachment point only. + } + + @Override + public double getScaled(double value) { + if (dist == null) { + throw new AbortException("Programming error: outlier scaling not initialized."); + } + double s = inverted ? (1 - dist.cdf(value)) : dist.cdf(value); + return (phi > 0.) ? (phi * s) / (1 - s + phi) : s; + } + + @Override + public double getMin() { + return 0.; + } + + @Override + public double getMax() { + return 1.; + } + + @Override + public void prepare(OutlierResult or) { + double[] s; + { + Relation<Double> scores = or.getScores(); + s = new double[scores.size()]; + int i = 0; + for (DBIDIter id = scores.iterDBIDs(); id.valid(); id.advance(), i++) { + s[i] = scores.get(id); + } + } + Arrays.sort(s); + dist = BestFitEstimator.STATIC.estimate(s, ArrayLikeUtil.DOUBLEARRAYADAPTER); + inverted = (or.getOutlierMeta() instanceof InvertedOutlierScoreMeta); + } + + @Override + public <A> void prepare(A array, NumberArrayAdapter<?, A> adapter) { + double[] s = ArrayLikeUtil.toPrimitiveDoubleArray(array, adapter); + Arrays.sort(s); + dist = BestFitEstimator.STATIC.estimate(s, ArrayLikeUtil.DOUBLEARRAYADAPTER); + inverted = false; // Not supported + } + + /** + * Parameterization class. + * + * @author Erich Schubert + * + * @apiviz.exclude + */ + public static class Parameterizer extends AbstractParameterizer { + /** + * Phi parameter. + */ + public static final OptionID PHI_ID = new OptionID("copscaling.phi", "Phi parameter, expected rate of outliers. Set to 0 to use raw CDF values."); + + /** + * Phi value. + */ + private double phi = 0.; + + @Override + protected void makeOptions(Parameterization config) { + super.makeOptions(config); + DoubleParameter phiP = new DoubleParameter(PHI_ID); + if (config.grab(phiP)) { + phi = phiP.doubleValue(); + } + } + + @Override + protected COPOutlierScaling makeInstance() { + return new COPOutlierScaling(phi); + } + } +} diff --git a/src/de/lmu/ifi/dbs/elki/utilities/scaling/outlier/HeDESNormalizationOutlierScaling.java b/src/de/lmu/ifi/dbs/elki/utilities/scaling/outlier/HeDESNormalizationOutlierScaling.java index 4bf86f06..5e55c2e0 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/scaling/outlier/HeDESNormalizationOutlierScaling.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/scaling/outlier/HeDESNormalizationOutlierScaling.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.scaling.outlier; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -28,6 +28,7 @@ import de.lmu.ifi.dbs.elki.database.relation.Relation; import de.lmu.ifi.dbs.elki.math.DoubleMinMax; import de.lmu.ifi.dbs.elki.math.MeanVariance; import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult; +import de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike.NumberArrayAdapter; import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; /** @@ -63,9 +64,29 @@ public class HeDESNormalizationOutlierScaling implements OutlierScalingFunction DoubleMinMax minmax = new DoubleMinMax(); Relation<Double> scores = or.getScores(); - for(DBIDIter id = scores.iterDBIDs(); id.valid(); id.advance()) { + for (DBIDIter id = scores.iterDBIDs(); id.valid(); id.advance()) { double val = scores.get(id); - if(!Double.isNaN(val) && !Double.isInfinite(val)) { + if (!Double.isNaN(val) && !Double.isInfinite(val)) { + mv.put(val); + minmax.put(val); + } + } + + mean = mv.getMean(); + stddev = mv.getSampleStddev(); + scaledmax = getScaled(minmax.getMax()); + scaledmin = getScaled(minmax.getMin()); + } + + @Override + public <A> void prepare(A array, NumberArrayAdapter<?, A> adapter) { + MeanVariance mv = new MeanVariance(); + DoubleMinMax minmax = new DoubleMinMax(); + + final int size = adapter.size(array); + for (int i = 0; i < size; i++) { + double val = adapter.getDouble(array, i); + if (!Double.isNaN(val) && !Double.isInfinite(val)) { mv.put(val); minmax.put(val); } @@ -90,6 +111,10 @@ public class HeDESNormalizationOutlierScaling implements OutlierScalingFunction @Override public double getScaled(double value) { assert (stddev > 0 || (value == mean)) : "prepare() was not run prior to using the scaling function."; - return (value - mean) / stddev; + if (stddev > 0.) { + return (value - mean) / stddev; + } else { + return 0.; + } } -}
\ No newline at end of file +} diff --git a/src/de/lmu/ifi/dbs/elki/utilities/scaling/outlier/MinusLogGammaScaling.java b/src/de/lmu/ifi/dbs/elki/utilities/scaling/outlier/MinusLogGammaScaling.java index f7cf0df0..effbcfe0 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/scaling/outlier/MinusLogGammaScaling.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/scaling/outlier/MinusLogGammaScaling.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.scaling.outlier; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/utilities/scaling/outlier/MinusLogStandardDeviationScaling.java b/src/de/lmu/ifi/dbs/elki/utilities/scaling/outlier/MinusLogStandardDeviationScaling.java index e34355a1..d30fbac7 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/scaling/outlier/MinusLogStandardDeviationScaling.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/scaling/outlier/MinusLogStandardDeviationScaling.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.scaling.outlier; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/utilities/scaling/outlier/MixtureModelOutlierScalingFunction.java b/src/de/lmu/ifi/dbs/elki/utilities/scaling/outlier/MixtureModelOutlierScalingFunction.java index e026b211..9c5ad920 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/scaling/outlier/MixtureModelOutlierScalingFunction.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/scaling/outlier/MixtureModelOutlierScalingFunction.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.scaling.outlier; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -31,6 +31,7 @@ import de.lmu.ifi.dbs.elki.logging.Logging; import de.lmu.ifi.dbs.elki.math.MathUtil; import de.lmu.ifi.dbs.elki.math.MeanVariance; import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult; +import de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike.NumberArrayAdapter; import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; /** @@ -45,7 +46,7 @@ public class MixtureModelOutlierScalingFunction implements OutlierScalingFunctio * The logger for this class. */ private static final Logging LOG = Logging.getLogger(MixtureModelOutlierScalingFunction.class); - + /** * Parameter mu of the gaussian distribution (outliers) */ @@ -122,14 +123,14 @@ public class MixtureModelOutlierScalingFunction implements OutlierScalingFunctio // Initial parameters - are these defaults sounds? MeanVariance mv = new MeanVariance(); Relation<Double> scores = or.getScores(); - for(DBIDIter id = scores.iterDBIDs(); id.valid(); id.advance()) { + for (DBIDIter id = scores.iterDBIDs(); id.valid(); id.advance()) { double val = scores.get(id); - if(!Double.isNaN(val) && !Double.isInfinite(val)) { + if (!Double.isNaN(val) && !Double.isInfinite(val)) { mv.put(val); } } double curMu = mv.getMean() * 2.; - if(curMu == 0) { + if (curMu == 0) { curMu = Double.MIN_NORMAL; } double curSigma = Math.max(mv.getSampleStddev(), Double.MIN_NORMAL); @@ -141,7 +142,7 @@ public class MixtureModelOutlierScalingFunction implements OutlierScalingFunctio int iter = 0; // logger.debugFine("iter #-1 mu = " + curMu + " sigma = " + curSigma + // " lambda = " + curLambda + " alpha = " + curAlpha); - while(true) { + while (true) { // E and M-Steps // Sum of weights double tisum = 0.0; @@ -149,7 +150,7 @@ public class MixtureModelOutlierScalingFunction implements OutlierScalingFunctio double wsum = 0.0; // Weighted deviation from previous mean double sqsum = 0.0; - for(int i = 0; i < ids.size(); i++) { + for (int i = 0; i < ids.size(); i++) { double val = or.getScores().get(ids.get(i)); // E-Step double ti = calcPosterior(val, curAlpha, curMu, curSigma, curLambda); @@ -158,7 +159,7 @@ public class MixtureModelOutlierScalingFunction implements OutlierScalingFunctio wsum += ti * val; sqsum += ti * val * val; // (val - curMu) * (val - curMu); } - if(tisum <= 0.0 || wsum <= 0.0) { + if (tisum <= 0.0 || wsum <= 0.0) { LOG.warning("MixtureModel Outlier Scaling converged to extreme."); break; } @@ -169,23 +170,115 @@ public class MixtureModelOutlierScalingFunction implements OutlierScalingFunctio // converged? { boolean changed = false; - if(Math.abs(newMu - curMu) > DELTA) { + if (Math.abs(newMu - curMu) > DELTA) { + changed = true; + } + if (Math.abs(newSigma - curSigma) > DELTA) { + changed = true; + } + if (Math.abs(newLambda - curLambda) > DELTA) { + changed = true; + } + if (Math.abs(newAlpha - curAlpha) > DELTA) { + changed = true; + } + if (!changed) { + break; + } + } + if (newSigma <= 0.0 || newAlpha <= 0.0) { + LOG.warning("MixtureModel Outlier Scaling converged to extreme."); + break; + } + // logger.debugFine("iter #"+iter+" mu = " + newMu + " sigma = " + + // newSigma + " lambda = " + newLambda + " alpha = " + newAlpha); + curMu = newMu; + curSigma = newSigma; + curLambda = newLambda; + curAlpha = newAlpha; + + iter++; + if (iter > 100) { + LOG.warning("Max iterations met in mixture model fitting."); + break; + } + } + mu = curMu; + sigma = curSigma; + lambda = curLambda; + alpha = curAlpha; + // logger.debugFine("mu = " + mu + " sigma = " + sigma + " lambda = " + + // lambda + " alpha = " + alpha); + } + + @Override + public <A> void prepare(A array, NumberArrayAdapter<?, A> adapter) { + // Initial parameters - are these defaults sounds? + MeanVariance mv = new MeanVariance(); + final int size = adapter.size(array); + for (int i = 0; i < size; i++) { + double val = adapter.getDouble(array, i); + if (!Double.isNaN(val) && !Double.isInfinite(val)) { + mv.put(val); + } + } + double curMu = mv.getMean() * 2.; + if (curMu == 0) { + curMu = Double.MIN_NORMAL; + } + double curSigma = Math.max(mv.getSampleStddev(), Double.MIN_NORMAL); + double curLambda = Math.min(1.0 / curMu, Double.MAX_VALUE); + double curAlpha = 0.05; + + // TODO: stop condition! + int iter = 0; + // logger.debugFine("iter #-1 mu = " + curMu + " sigma = " + curSigma + + // " lambda = " + curLambda + " alpha = " + curAlpha); + while (true) { + // E and M-Steps + // Sum of weights + double tisum = 0.0; + // Weighted sum + double wsum = 0.0; + // Weighted deviation from previous mean + double sqsum = 0.0; + for (int i = 0; i < size; i++) { + double val = adapter.getDouble(array, i); + // E-Step + double ti = calcPosterior(val, curAlpha, curMu, curSigma, curLambda); + // M-Step + tisum += ti; + wsum += ti * val; + sqsum += ti * val * val; // (val - curMu) * (val - curMu); + } + if (tisum <= 0.0 || wsum <= 0.0) { + LOG.warning("MixtureModel Outlier Scaling converged to extreme."); + break; + } + double newMu = wsum / tisum; + double newSigma = Math.max(Math.sqrt(sqsum / tisum - newMu * newMu), Double.MIN_NORMAL); + double newLambda = Math.min(tisum / wsum, Double.MAX_VALUE); + double newAlpha = tisum / size; + // converged? + { + boolean changed = false; + if (Math.abs(newMu - curMu) > DELTA) { changed = true; } - if(Math.abs(newSigma - curSigma) > DELTA) { + if (Math.abs(newSigma - curSigma) > DELTA) { changed = true; } - if(Math.abs(newLambda - curLambda) > DELTA) { + if (Math.abs(newLambda - curLambda) > DELTA) { changed = true; } - if(Math.abs(newAlpha - curAlpha) > DELTA) { + if (Math.abs(newAlpha - curAlpha) > DELTA) { changed = true; } - if(!changed) { + if (!changed) { break; } } - if(newSigma <= 0.0 || newAlpha <= 0.0) { + if (newSigma <= 0.0 || newAlpha <= 0.0) { LOG.warning("MixtureModel Outlier Scaling converged to extreme."); break; } @@ -197,7 +290,7 @@ public class MixtureModelOutlierScalingFunction implements OutlierScalingFunctio curAlpha = newAlpha; iter++; - if(iter > 100) { + if (iter > 100) { LOG.warning("Max iterations met in mixture model fitting."); break; } @@ -224,9 +317,9 @@ public class MixtureModelOutlierScalingFunction implements OutlierScalingFunctio public double getScaled(double value) { final double val = 1.0 - calcPosterior(value, alpha, mu, sigma, lambda); // Work around issues with unstable convergence. - if(Double.isNaN(val)) { + if (Double.isNaN(val)) { return 0.0; } return val; } -}
\ No newline at end of file +} diff --git a/src/de/lmu/ifi/dbs/elki/utilities/scaling/outlier/MultiplicativeInverseScaling.java b/src/de/lmu/ifi/dbs/elki/utilities/scaling/outlier/MultiplicativeInverseScaling.java index 298a5853..d5cd3f40 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/scaling/outlier/MultiplicativeInverseScaling.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/scaling/outlier/MultiplicativeInverseScaling.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.scaling.outlier; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -26,6 +26,7 @@ package de.lmu.ifi.dbs.elki.utilities.scaling.outlier; import de.lmu.ifi.dbs.elki.database.ids.DBIDIter; import de.lmu.ifi.dbs.elki.database.relation.Relation; import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult; +import de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike.NumberArrayAdapter; import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; /** @@ -68,16 +69,6 @@ public class MultiplicativeInverseScaling implements OutlierScalingFunction { @Override public void prepare(OutlierResult or) { - scaleval = getScaleValue(or); - } - - /** - * Compute the scaling value in a linear scan over the annotation. - * - * @param or Outlier result - * @return Scaling value. - */ - private static double getScaleValue(OutlierResult or) { double max = Double.MIN_VALUE; Relation<Double> scores = or.getScores(); for(DBIDIter id = scores.iterDBIDs(); id.valid(); id.advance()) { @@ -87,9 +78,22 @@ public class MultiplicativeInverseScaling implements OutlierScalingFunction { max = Math.max(max, inv); } } - return max; + scaleval = max; } - + + @Override + public <A> void prepare(A array, NumberArrayAdapter<?, A> adapter) { + double max = Double.MIN_VALUE; + final int size = adapter.size(array); + for (int i = 0; i < size; i++) { + double inv = Math.abs(1.0 / adapter.getDouble(array, i)); + if(!Double.isInfinite(inv) && !Double.isNaN(inv)) { + max = Math.max(max, inv); + } + } + scaleval = max; + } + @Override public double getMin() { return 0.0; diff --git a/src/de/lmu/ifi/dbs/elki/utilities/scaling/outlier/OutlierGammaScaling.java b/src/de/lmu/ifi/dbs/elki/utilities/scaling/outlier/OutlierGammaScaling.java index 07e58679..7da0c933 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/scaling/outlier/OutlierGammaScaling.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/scaling/outlier/OutlierGammaScaling.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.scaling.outlier; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -29,6 +29,7 @@ import de.lmu.ifi.dbs.elki.math.MeanVariance; import de.lmu.ifi.dbs.elki.math.statistics.distribution.GammaDistribution; import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult; import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta; +import de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike.NumberArrayAdapter; import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID; @@ -91,7 +92,7 @@ public class OutlierGammaScaling implements OutlierScalingFunction { public double getScaled(double value) { assert (theta > 0) : "prepare() was not run prior to using the scaling function."; value = preScale(value); - if(Double.isNaN(value) || Double.isInfinite(value)) { + if (Double.isNaN(value) || Double.isInfinite(value)) { return 1.0; } return Math.max(0, (GammaDistribution.regularizedGammaP(k, value / theta) - atmean) / (1 - atmean)); @@ -102,10 +103,29 @@ public class OutlierGammaScaling implements OutlierScalingFunction { meta = or.getOutlierMeta(); MeanVariance mv = new MeanVariance(); Relation<Double> scores = or.getScores(); - for(DBIDIter id = scores.iterDBIDs(); id.valid(); id.advance()) { + for (DBIDIter id = scores.iterDBIDs(); id.valid(); id.advance()) { double score = scores.get(id); score = preScale(score); - if(!Double.isNaN(score) && !Double.isInfinite(score)) { + if (!Double.isNaN(score) && !Double.isInfinite(score)) { + mv.put(score); + } + } + final double mean = mv.getMean(); + final double var = mv.getSampleVariance(); + k = (mean * mean) / var; + theta = var / mean; + atmean = GammaDistribution.regularizedGammaP(k, mean / theta); + // logger.warning("Mean:"+mean+" Var:"+var+" Theta: "+theta+" k: "+k+" valatmean"+atmean); + } + + @Override + public <A> void prepare(A array, NumberArrayAdapter<?, A> adapter) { + MeanVariance mv = new MeanVariance(); + final int size = adapter.size(array); + for (int i = 0; i < size; i++) { + double score = adapter.getDouble(array, i); + score = preScale(score); + if (!Double.isNaN(score) && !Double.isInfinite(score)) { mv.put(score); } } @@ -126,7 +146,7 @@ public class OutlierGammaScaling implements OutlierScalingFunction { * @return Normalized score. */ protected double preScale(double score) { - if(normalize) { + if (normalize) { score = meta.normalizeScore(score); } return score; @@ -156,7 +176,7 @@ public class OutlierGammaScaling implements OutlierScalingFunction { protected void makeOptions(Parameterization config) { super.makeOptions(config); Flag normalizeF = new Flag(NORMALIZE_ID); - if(config.grab(normalizeF)) { + if (config.grab(normalizeF)) { normalize = normalizeF.getValue(); } } @@ -166,4 +186,4 @@ public class OutlierGammaScaling implements OutlierScalingFunction { return new OutlierGammaScaling(normalize); } } -}
\ No newline at end of file +} diff --git a/src/de/lmu/ifi/dbs/elki/utilities/scaling/outlier/OutlierLinearScaling.java b/src/de/lmu/ifi/dbs/elki/utilities/scaling/outlier/OutlierLinearScaling.java index 8f008176..dfae1068 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/scaling/outlier/OutlierLinearScaling.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/scaling/outlier/OutlierLinearScaling.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.scaling.outlier; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -30,6 +30,7 @@ import de.lmu.ifi.dbs.elki.database.relation.Relation; import de.lmu.ifi.dbs.elki.math.DoubleMinMax; import de.lmu.ifi.dbs.elki.math.MeanVariance; import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult; +import de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike.NumberArrayAdapter; import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID; import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GlobalParameterConstraint; @@ -136,7 +137,7 @@ public class OutlierLinearScaling implements OutlierScalingFunction { @Override public double getScaled(double value) { assert (factor != 0) : "prepare() was not run prior to using the scaling function."; - if(value <= min) { + if (value <= min) { return 0; } return Math.min(1, ((value - min) / factor)); @@ -144,53 +145,107 @@ public class OutlierLinearScaling implements OutlierScalingFunction { @Override public void prepare(OutlierResult or) { - if(usemean) { + if (usemean) { MeanVariance mv = new MeanVariance(); DoubleMinMax mm = (max == null) ? new DoubleMinMax() : null; boolean skippedzeros = false; Relation<Double> scores = or.getScores(); - for(DBIDIter id = scores.iterDBIDs(); id.valid(); id.advance()) { + for (DBIDIter id = scores.iterDBIDs(); id.valid(); id.advance()) { double val = scores.get(id); - if(nozeros && val == 0.0) { + if (nozeros && val == 0.0) { skippedzeros = true; continue; } - if(!Double.isNaN(val) && !Double.isInfinite(val)) { + if (!Double.isNaN(val) && !Double.isInfinite(val)) { mv.put(val); } - if(max == null) { + if (max == null) { mm.put(val); } } - if(skippedzeros && mm.getMin() == mm.getMax()) { + if (skippedzeros && mm.getMin() == mm.getMax()) { mm.put(0.0); mv.put(0.0); } min = mv.getMean(); - if(max == null) { + if (max == null) { max = mm.getMax(); } - } - else { - if(min == null || max == null) { + } else { + if (min == null || max == null) { boolean skippedzeros = false; DoubleMinMax mm = new DoubleMinMax(); Relation<Double> scores = or.getScores(); - for(DBIDIter id = scores.iterDBIDs(); id.valid(); id.advance()) { + for (DBIDIter id = scores.iterDBIDs(); id.valid(); id.advance()) { double val = scores.get(id); - if(nozeros && val == 0.0) { + if (nozeros && val == 0.0) { + skippedzeros = true; + continue; + } + mm.put(val); + } + if (skippedzeros && mm.getMin() == mm.getMax()) { + mm.put(0.0); + } + if (min == null) { + min = mm.getMin(); + } + if (max == null) { + max = mm.getMax(); + } + } + } + factor = (max - min); + } + + @Override + public <A> void prepare(A array, NumberArrayAdapter<?, A> adapter) { + if (usemean) { + MeanVariance mv = new MeanVariance(); + DoubleMinMax mm = (max == null) ? new DoubleMinMax() : null; + boolean skippedzeros = false; + final int size = adapter.size(array); + for (int i = 0; i < size; i++) { + double val = adapter.getDouble(array, i); + if (nozeros && val == 0.0) { + skippedzeros = true; + continue; + } + if (!Double.isNaN(val) && !Double.isInfinite(val)) { + mv.put(val); + } + if (max == null) { + mm.put(val); + } + } + if (skippedzeros && mm.getMin() == mm.getMax()) { + mm.put(0.0); + mv.put(0.0); + } + min = mv.getMean(); + if (max == null) { + max = mm.getMax(); + } + } else { + if (min == null || max == null) { + boolean skippedzeros = false; + DoubleMinMax mm = new DoubleMinMax(); + final int size = adapter.size(array); + for (int i = 0; i < size; i++) { + double val = adapter.getDouble(array, i); + if (nozeros && val == 0.0) { skippedzeros = true; continue; } mm.put(val); } - if(skippedzeros && mm.getMin() == mm.getMax()) { + if (skippedzeros && mm.getMin() == mm.getMax()) { mm.put(0.0); } - if(min == null) { + if (min == null) { min = mm.getMin(); } - if(max == null) { + if (max == null) { max = mm.getMax(); } } @@ -241,28 +296,28 @@ public class OutlierLinearScaling implements OutlierScalingFunction { super.makeOptions(config); DoubleParameter minP = new DoubleParameter(MIN_ID); minP.setOptional(true); - if(config.grab(minP)) { + if (config.grab(minP)) { min = minP.getValue(); } DoubleParameter maxP = new DoubleParameter(MAX_ID); maxP.setOptional(true); - if(config.grab(maxP)) { + if (config.grab(maxP)) { max = maxP.getValue(); } Flag meanF = new Flag(MEAN_ID); - if(config.grab(meanF)) { + if (config.grab(meanF)) { usemean = meanF.getValue(); } Flag nozerosF = new Flag(NOZEROS_ID); - if(config.grab(nozerosF)) { + if (config.grab(nozerosF)) { nozeros = nozerosF.getValue(); } // Use-Mean and Minimum value must not be set at the same time! - ArrayList<Parameter<?>> minmean = new ArrayList<Parameter<?>>(); + ArrayList<Parameter<?>> minmean = new ArrayList<>(); minmean.add(minP); minmean.add(meanF); GlobalParameterConstraint gpc = new OnlyOneIsAllowedToBeSetGlobalConstraint(minmean); @@ -274,4 +329,4 @@ public class OutlierLinearScaling implements OutlierScalingFunction { return new OutlierLinearScaling(min, max, usemean, nozeros); } } -}
\ No newline at end of file +} diff --git a/src/de/lmu/ifi/dbs/elki/utilities/scaling/outlier/OutlierMinusLogScaling.java b/src/de/lmu/ifi/dbs/elki/utilities/scaling/outlier/OutlierMinusLogScaling.java index 45c6928b..f0e04913 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/scaling/outlier/OutlierMinusLogScaling.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/scaling/outlier/OutlierMinusLogScaling.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.scaling.outlier; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -27,6 +27,7 @@ import de.lmu.ifi.dbs.elki.database.ids.DBIDIter; import de.lmu.ifi.dbs.elki.database.relation.Relation; import de.lmu.ifi.dbs.elki.math.DoubleMinMax; import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult; +import de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike.NumberArrayAdapter; import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; /** @@ -79,13 +80,27 @@ public class OutlierMinusLogScaling implements OutlierScalingFunction { public void prepare(OutlierResult or) { DoubleMinMax mm = new DoubleMinMax(); Relation<Double> scores = or.getScores(); - for(DBIDIter id = scores.iterDBIDs(); id.valid(); id.advance()) { + for (DBIDIter id = scores.iterDBIDs(); id.valid(); id.advance()) { double val = scores.get(id); - if(!Double.isNaN(val) && !Double.isInfinite(val)) { + if (!Double.isNaN(val) && !Double.isInfinite(val)) { mm.put(val); } } max = mm.getMax(); mlogmax = -Math.log(mm.getMin() / max); } -}
\ No newline at end of file + + @Override + public <A> void prepare(A array, NumberArrayAdapter<?, A> adapter) { + DoubleMinMax mm = new DoubleMinMax(); + final int size = adapter.size(array); + for (int i = 0; i < size; i++) { + double val = adapter.getDouble(array, i); + if (!Double.isNaN(val) && !Double.isInfinite(val)) { + mm.put(val); + } + } + max = mm.getMax(); + mlogmax = -Math.log(mm.getMin() / max); + } +} diff --git a/src/de/lmu/ifi/dbs/elki/utilities/scaling/outlier/OutlierScalingFunction.java b/src/de/lmu/ifi/dbs/elki/utilities/scaling/outlier/OutlierScalingFunction.java index ac774745..9de05e78 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/scaling/outlier/OutlierScalingFunction.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/scaling/outlier/OutlierScalingFunction.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.scaling.outlier; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -24,6 +24,7 @@ package de.lmu.ifi.dbs.elki.utilities.scaling.outlier; */ import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult; +import de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike.NumberArrayAdapter; import de.lmu.ifi.dbs.elki.utilities.scaling.ScalingFunction; /** @@ -37,9 +38,22 @@ public interface OutlierScalingFunction extends ScalingFunction { /** * Prepare is called once for each data set, before getScaled() will be * called. This function can be used to extract global parameters such as - * means, minimums or maximums from the Database, Result or Annotation. + * means, minimums or maximums from the outlier scores. * * @param or Outlier result to use */ public void prepare(OutlierResult or); + + /** + * Prepare is called once for each data set, before getScaled() will be + * called. This function can be used to extract global parameters such as + * means, minimums or maximums from the score array. + * + * The method using a full {@link OutlierResult} is preferred, as it will + * allow access to the metadata. + * + * @param array Data to process + * @param adapter Array adapter + */ + public <A> void prepare(A array, NumberArrayAdapter<?, A> adapter); }
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/utilities/scaling/outlier/OutlierSqrtScaling.java b/src/de/lmu/ifi/dbs/elki/utilities/scaling/outlier/OutlierSqrtScaling.java index 41a4d721..302602d4 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/scaling/outlier/OutlierSqrtScaling.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/scaling/outlier/OutlierSqrtScaling.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.scaling.outlier; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -27,6 +27,7 @@ import de.lmu.ifi.dbs.elki.database.ids.DBIDIter; import de.lmu.ifi.dbs.elki.database.relation.Relation; import de.lmu.ifi.dbs.elki.math.DoubleMinMax; import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult; +import de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike.NumberArrayAdapter; import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID; import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization; @@ -43,30 +44,14 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter; */ public class OutlierSqrtScaling implements OutlierScalingFunction { /** - * Parameter to specify the fixed minimum to use. - * <p> - * Key: {@code -sqrtscale.min} - * </p> + * Minimum and maximum values. */ - public static final OptionID MIN_ID = new OptionID("sqrtscale.min", "Fixed minimum to use in sqrt scaling."); + protected double min, max; /** - * Parameter to specify the fixed maximum to use. - * <p> - * Key: {@code -sqrtscale.max} - * </p> + * Predefined minimum and maximum values. */ - public static final OptionID MAX_ID = new OptionID("sqrtscale.max", "Fixed maximum to use in sqrt scaling."); - - /** - * Field storing the minimum value - */ - protected Double min = null; - - /** - * Field storing the Maximum value - */ - protected Double max = null; + protected Double pmin = null, pmax = null; /** * Scaling factor @@ -76,19 +61,19 @@ public class OutlierSqrtScaling implements OutlierScalingFunction { /** * Constructor. * - * @param min - * @param max + * @param pmin Predefined minimum + * @param pmax Predefined maximum */ - public OutlierSqrtScaling(Double min, Double max) { + public OutlierSqrtScaling(Double pmin, Double pmax) { super(); - this.min = min; - this.max = max; + this.pmin = pmin; + this.pmax = pmax; } @Override public double getScaled(double value) { assert (factor != 0) : "prepare() was not run prior to using the scaling function."; - if(value <= min) { + if (value <= min) { return 0; } return Math.min(1, (Math.sqrt(value - min) / factor)); @@ -96,21 +81,34 @@ public class OutlierSqrtScaling implements OutlierScalingFunction { @Override public void prepare(OutlierResult or) { - if(min == null || max == null) { + if (pmin == null || pmax == null) { DoubleMinMax mm = new DoubleMinMax(); Relation<Double> scores = or.getScores(); - for(DBIDIter id = scores.iterDBIDs(); id.valid(); id.advance()) { + for (DBIDIter id = scores.iterDBIDs(); id.valid(); id.advance()) { double val = scores.get(id); - if(!Double.isNaN(val) && !Double.isInfinite(val)) { + if (!Double.isInfinite(val)) { mm.put(val); } } - if(min == null) { - min = mm.getMin(); - } - if(max == null) { - max = mm.getMax(); + min = (pmin == null) ? mm.getMin() : pmin; + max = (pmax == null) ? mm.getMax() : pmax; + } + factor = Math.sqrt(max - min); + } + + @Override + public <A> void prepare(A array, NumberArrayAdapter<?, A> adapter) { + if (pmin == null || pmax == null) { + DoubleMinMax mm = new DoubleMinMax(); + final int size = adapter.size(array); + for (int i = 0; i < size; i++) { + double val = adapter.getDouble(array, i); + if (!Double.isInfinite(val)) { + mm.put(val); + } } + min = (pmin == null) ? mm.getMin() : pmin; + max = (pmax == null) ? mm.getMax() : pmax; } factor = Math.sqrt(max - min); } @@ -133,8 +131,30 @@ public class OutlierSqrtScaling implements OutlierScalingFunction { * @apiviz.exclude */ public static class Parameterizer extends AbstractParameterizer { + /** + * Parameter to specify the fixed minimum to use. + * <p> + * Key: {@code -sqrtscale.min} + * </p> + */ + public static final OptionID MIN_ID = new OptionID("sqrtscale.min", "Fixed minimum to use in sqrt scaling."); + + /** + * Parameter to specify the fixed maximum to use. + * <p> + * Key: {@code -sqrtscale.max} + * </p> + */ + public static final OptionID MAX_ID = new OptionID("sqrtscale.max", "Fixed maximum to use in sqrt scaling."); + + /** + * Predefined minimum value. + */ protected double min; + /** + * Predefined maximum value. + */ protected double max; @Override @@ -142,12 +162,12 @@ public class OutlierSqrtScaling implements OutlierScalingFunction { super.makeOptions(config); DoubleParameter minP = new DoubleParameter(MIN_ID); minP.setOptional(true); - if(config.grab(minP)) { + if (config.grab(minP)) { min = minP.getValue(); } DoubleParameter maxP = new DoubleParameter(MAX_ID); maxP.setOptional(true); - if(config.grab(maxP)) { + if (config.grab(maxP)) { max = maxP.getValue(); } } @@ -157,4 +177,4 @@ public class OutlierSqrtScaling implements OutlierScalingFunction { return new OutlierSqrtScaling(min, max); } } -}
\ No newline at end of file +} diff --git a/src/de/lmu/ifi/dbs/elki/utilities/scaling/outlier/RankingPseudoOutlierScaling.java b/src/de/lmu/ifi/dbs/elki/utilities/scaling/outlier/RankingPseudoOutlierScaling.java index 91f66587..505f2002 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/scaling/outlier/RankingPseudoOutlierScaling.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/scaling/outlier/RankingPseudoOutlierScaling.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.scaling.outlier; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -29,6 +29,8 @@ import de.lmu.ifi.dbs.elki.database.ids.DBIDIter; import de.lmu.ifi.dbs.elki.database.relation.Relation; import de.lmu.ifi.dbs.elki.result.outlier.InvertedOutlierScoreMeta; import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult; +import de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike.ArrayLikeUtil; +import de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike.NumberArrayAdapter; import de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException; /** @@ -69,6 +71,12 @@ public class RankingPseudoOutlierScaling implements OutlierScalingFunction { // TODO: Inverted scores! Arrays.sort(scores); } + + @Override + public <A> void prepare(A array, NumberArrayAdapter<?, A> adapter) { + scores = ArrayLikeUtil.toPrimitiveDoubleArray(array, adapter); + Arrays.sort(scores); + } @Override public double getMax() { diff --git a/src/de/lmu/ifi/dbs/elki/utilities/scaling/outlier/SigmoidOutlierScalingFunction.java b/src/de/lmu/ifi/dbs/elki/utilities/scaling/outlier/SigmoidOutlierScalingFunction.java index 16115e92..34911169 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/scaling/outlier/SigmoidOutlierScalingFunction.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/scaling/outlier/SigmoidOutlierScalingFunction.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.scaling.outlier; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -26,12 +26,14 @@ package de.lmu.ifi.dbs.elki.utilities.scaling.outlier; import java.util.BitSet; import de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs; +import de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter; import de.lmu.ifi.dbs.elki.database.ids.DBIDIter; import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil; import de.lmu.ifi.dbs.elki.database.relation.Relation; import de.lmu.ifi.dbs.elki.logging.Logging; import de.lmu.ifi.dbs.elki.math.MeanVariance; import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult; +import de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike.NumberArrayAdapter; import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; /** @@ -46,7 +48,7 @@ public class SigmoidOutlierScalingFunction implements OutlierScalingFunction { * The logger for this class. */ private static final Logging LOG = Logging.getLogger(SigmoidOutlierScalingFunction.class); - + /** * Sigmoid parameter */ @@ -62,30 +64,31 @@ public class SigmoidOutlierScalingFunction implements OutlierScalingFunction { // Initial parameters - are these defaults sounds? MeanVariance mv = new MeanVariance(); Relation<Double> scores = or.getScores(); - for(DBIDIter id = scores.iterDBIDs(); id.valid(); id.advance()) { + for (DBIDIter id = scores.iterDBIDs(); id.valid(); id.advance()) { double val = scores.get(id); mv.put(val); } double a = 1.0; - double b = - mv.getMean(); + double b = -mv.getMean(); int iter = 0; ArrayDBIDs ids = DBIDUtil.ensureArray(or.getScores().getDBIDs()); + DBIDArrayIter it = ids.iter(); BitSet t = new BitSet(ids.size()); boolean changing = true; - while(changing) { + while (changing) { changing = false; // E-Step - for(int i = 0; i < ids.size(); i++) { - double val = or.getScores().get(ids.get(i)); + it.seek(0); + for (int i = 0; i < ids.size(); i++, it.advance()) { + double val = or.getScores().get(it); double targ = a * val + b; - if(targ > 0) { - if (!t.get(i)) { + if (targ > 0) { + if (!t.get(i)) { t.set(i); changing = true; } - } - else { + } else { if (t.get(i)) { t.clear(i); changing = true; @@ -95,7 +98,7 @@ public class SigmoidOutlierScalingFunction implements OutlierScalingFunction { if (!changing) { break; } - //logger.debugFine("Number of outliers in sigmoid: " + t.cardinality()); + // logger.debugFine("Number of outliers in sigmoid: " + t.cardinality()); // M-Step // Implementation based on:<br /> // H.-T. Lin, C.-J. Lin, R. C. Weng:<br /> @@ -107,14 +110,74 @@ public class SigmoidOutlierScalingFunction implements OutlierScalingFunction { } iter++; - if(iter > 100) { + if (iter > 100) { + LOG.warning("Max iterations met in sigmoid fitting."); + break; + } + } + Afinal = a; + Bfinal = b; + LOG.debugFine("A = " + Afinal + " B = " + Bfinal); + } + + @Override + public <A> void prepare(A array, NumberArrayAdapter<?, A> adapter) { + // Initial parameters - are these defaults sounds? + MeanVariance mv = new MeanVariance(); + final int size = adapter.size(array); + for (int i = 0; i < size; i++) { + double val = adapter.getDouble(array, i); + if (!Double.isInfinite(val)) { + mv.put(val); + } + } + double a = 1.0; + double b = -mv.getMean(); + int iter = 0; + + BitSet t = new BitSet(size); + boolean changing = true; + while (changing) { + changing = false; + // E-Step + for (int i = 0; i < size; i++) { + double val = adapter.getDouble(array, i); + double targ = a * val + b; + if (targ > 0) { + if (!t.get(i)) { + t.set(i); + changing = true; + } + } else { + if (t.get(i)) { + t.clear(i); + changing = true; + } + } + } + if (!changing) { + break; + } + // logger.debugFine("Number of outliers in sigmoid: " + t.cardinality()); + // M-Step + // Implementation based on:<br /> + // H.-T. Lin, C.-J. Lin, R. C. Weng:<br /> + // A Note on Platt’s Probabilistic Outputs for Support Vector Machines + { + double[] newab = MStepLevenbergMarquardt(a, b, t, array, adapter); + a = newab[0]; + b = newab[1]; + } + + iter++; + if (iter > 100) { LOG.warning("Max iterations met in sigmoid fitting."); break; } } Afinal = a; Bfinal = b; - LOG.debugFine("A = "+Afinal+" B = "+Bfinal); + LOG.debugFine("A = " + Afinal + " B = " + Bfinal); } /** @@ -136,6 +199,7 @@ public class SigmoidOutlierScalingFunction implements OutlierScalingFunction { private final double[] MStepLevenbergMarquardt(double a, double b, ArrayDBIDs ids, BitSet t, Relation<Double> scores) { final int prior1 = t.cardinality(); final int prior0 = ids.size() - prior1; + DBIDArrayIter iter = ids.iter(); final int maxiter = 10; final double minstep = 1e-8; @@ -147,38 +211,38 @@ public class SigmoidOutlierScalingFunction implements OutlierScalingFunction { // t[i] := t.get(i) ? hiTarget : loTarget. // Reset, or continue with previous values? - //a = 0.0; - //b = Math.log((prior0 + 1.0) / (prior1 + 1.0)); + // a = 0.0; + // b = Math.log((prior0 + 1.0) / (prior1 + 1.0)); double fval = 0.0; - for(int i = 0; i < ids.size(); i++) { - final double val = scores.get(ids.get(i)); + iter.seek(0); + for (int i = 0; i < ids.size(); i++, iter.advance()) { + final double val = scores.get(iter); final double fApB = val * a + b; final double ti = t.get(i) ? hiTarget : loTarget; - if(fApB >= 0) { + if (fApB >= 0) { fval += ti * fApB + Math.log(1 + Math.exp(-fApB)); - } - else { + } else { fval += (ti - 1) * fApB + Math.log(1 + Math.exp(fApB)); } } - for(int it = 0; it < maxiter; it++) { - //logger.debugFinest("Iter: " + it + "a: " + a + " b: " + b); + for (int it = 0; it < maxiter; it++) { + // logger.debugFinest("Iter: " + it + "a: " + a + " b: " + b); // Update Gradient and Hessian (use H’ = H + sigma I) double h11 = sigma; double h22 = sigma; double h21 = 0.0; double g1 = 0.0; double g2 = 0.0; - for(int i = 0; i < ids.size(); i++) { - final double val = scores.get(ids.get(i)); + iter.seek(0); + for (int i = 0; i < ids.size(); i++, iter.advance()) { + final double val = scores.get(iter); final double fApB = val * a + b; final double p; final double q; - if(fApB >= 0) { + if (fApB >= 0) { p = Math.exp(-fApB) / (1.0 + Math.exp(-fApB)); q = 1.0 / (1.0 + Math.exp(-fApB)); - } - else { + } else { p = 1.0 / (1.0 + Math.exp(fApB)); q = Math.exp(fApB) / (1.0 + Math.exp(fApB)); } @@ -191,7 +255,7 @@ public class SigmoidOutlierScalingFunction implements OutlierScalingFunction { g2 += d1; } // Stop condition - if(Math.abs(g1) < 1e-5 && Math.abs(g2) < 1e-5) { + if (Math.abs(g1) < 1e-5 && Math.abs(g2) < 1e-5) { break; } // Compute modified Newton directions @@ -200,36 +264,152 @@ public class SigmoidOutlierScalingFunction implements OutlierScalingFunction { final double dB = -(-h21 * g1 + h11 * g2) / det; final double gd = g1 * dA + g2 * dB; double stepsize = 1.0; - while(stepsize >= minstep) { // Line search + while (stepsize >= minstep) { // Line search final double newA = a + stepsize * dA; final double newB = b + stepsize * dB; double newf = 0.0; - for(int i = 0; i < ids.size(); i++) { - final double val = scores.get(ids.get(i)); + iter.seek(0); + for (int i = 0; i < ids.size(); i++, iter.advance()) { + final double val = scores.get(iter); final double fApB = val * newA + newB; final double ti = t.get(i) ? hiTarget : loTarget; - if(fApB >= 0) { + if (fApB >= 0) { newf += ti * fApB + Math.log(1 + Math.exp(-fApB)); - } - else { + } else { newf += (ti - 1) * fApB + Math.log(1 + Math.exp(fApB)); } } - if(newf < fval + 0.0001 * stepsize * gd) { + if (newf < fval + 0.0001 * stepsize * gd) { a = newA; b = newB; fval = newf; break; // Sufficient decrease satisfied + } else { + stepsize /= 2.0; + } + if (stepsize < minstep) { + LOG.debug("Minstep hit."); + break; + } + } + if (it + 1 >= maxiter) { + LOG.debug("Maximum iterations hit."); + break; + } + } + return new double[] { a, b }; + } + + /** + * M-Step using a modified Levenberg-Marquardt method. + * + * <p> + * Implementation based on:<br /> + * H.-T. Lin, C.-J. Lin, R. C. Weng:<br /> + * A Note on Platt’s Probabilistic Outputs for Support Vector Machines + * </p> + * + * @param a A parameter + * @param b B parameter + * @param t Bitset containing the assignment + * @param array Score array + * @param adapter Array adapter + * @return new values for A and B. + */ + private final <A> double[] MStepLevenbergMarquardt(double a, double b, BitSet t, A array, NumberArrayAdapter<?, A> adapter) { + final int size = adapter.size(array); + final int prior1 = t.cardinality(); + final int prior0 = size - prior1; + + final int maxiter = 10; + final double minstep = 1e-8; + final double sigma = 1e-12; + // target value for "set" objects + final double loTarget = (prior1 + 1.0) / (prior1 + 2.0); + // target value for "unset" objects + final double hiTarget = 1.0 / (prior0 + 2.0); + // t[i] := t.get(i) ? hiTarget : loTarget. + + // Reset, or continue with previous values? + // a = 0.0; + // b = Math.log((prior0 + 1.0) / (prior1 + 1.0)); + double fval = 0.0; + for (int i = 0; i < size; i++) { + final double val = adapter.getDouble(array, i); + final double fApB = val * a + b; + final double ti = t.get(i) ? hiTarget : loTarget; + if (fApB >= 0) { + fval += ti * fApB + Math.log(1 + Math.exp(-fApB)); + } else { + fval += (ti - 1) * fApB + Math.log(1 + Math.exp(fApB)); + } + } + for (int it = 0; it < maxiter; it++) { + // logger.debugFinest("Iter: " + it + "a: " + a + " b: " + b); + // Update Gradient and Hessian (use H’ = H + sigma I) + double h11 = sigma; + double h22 = sigma; + double h21 = 0.0; + double g1 = 0.0; + double g2 = 0.0; + for (int i = 0; i < size; i++) { + final double val = adapter.getDouble(array, i); + final double fApB = val * a + b; + final double p; + final double q; + if (fApB >= 0) { + p = Math.exp(-fApB) / (1.0 + Math.exp(-fApB)); + q = 1.0 / (1.0 + Math.exp(-fApB)); + } else { + p = 1.0 / (1.0 + Math.exp(fApB)); + q = Math.exp(fApB) / (1.0 + Math.exp(fApB)); + } + final double d2 = p * q; + h11 += val * val * d2; + h22 += d2; + h21 += val * d2; + final double d1 = (t.get(i) ? hiTarget : loTarget) - p; + g1 += val * d1; + g2 += d1; + } + // Stop condition + if (Math.abs(g1) < 1e-5 && Math.abs(g2) < 1e-5) { + break; + } + // Compute modified Newton directions + final double det = h11 * h22 - h21 * h21; + final double dA = -(h22 * g1 - h21 * g2) / det; + final double dB = -(-h21 * g1 + h11 * g2) / det; + final double gd = g1 * dA + g2 * dB; + double stepsize = 1.0; + while (stepsize >= minstep) { // Line search + final double newA = a + stepsize * dA; + final double newB = b + stepsize * dB; + double newf = 0.0; + for (int i = 0; i < size; i++) { + final double val = adapter.getDouble(array, i); + final double fApB = val * newA + newB; + final double ti = t.get(i) ? hiTarget : loTarget; + if (fApB >= 0) { + newf += ti * fApB + Math.log(1 + Math.exp(-fApB)); + } else { + newf += (ti - 1) * fApB + Math.log(1 + Math.exp(fApB)); + } } - else { + if (newf < fval + 0.0001 * stepsize * gd) { + a = newA; + b = newB; + fval = newf; + break; // Sufficient decrease satisfied + } else { stepsize /= 2.0; } - if(stepsize < minstep) { + if (stepsize < minstep) { LOG.debug("Minstep hit."); break; } } - if(it + 1 >= maxiter) { + if (it + 1 >= maxiter) { LOG.debug("Maximum iterations hit."); break; } @@ -251,4 +431,4 @@ public class SigmoidOutlierScalingFunction implements OutlierScalingFunction { public double getScaled(double value) { return 1.0 / (1 + Math.exp(-Afinal * value - Bfinal)); } -}
\ No newline at end of file +} diff --git a/src/de/lmu/ifi/dbs/elki/utilities/scaling/outlier/SqrtStandardDeviationScaling.java b/src/de/lmu/ifi/dbs/elki/utilities/scaling/outlier/SqrtStandardDeviationScaling.java index 6c38c781..2110570e 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/scaling/outlier/SqrtStandardDeviationScaling.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/scaling/outlier/SqrtStandardDeviationScaling.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.scaling.outlier; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -25,11 +25,12 @@ package de.lmu.ifi.dbs.elki.utilities.scaling.outlier; import de.lmu.ifi.dbs.elki.database.ids.DBIDIter; import de.lmu.ifi.dbs.elki.database.relation.Relation; -import de.lmu.ifi.dbs.elki.math.DoubleMinMax; import de.lmu.ifi.dbs.elki.math.MathUtil; -import de.lmu.ifi.dbs.elki.math.MeanVariance; +import de.lmu.ifi.dbs.elki.math.Mean; +import de.lmu.ifi.dbs.elki.math.MeanVarianceMinMax; import de.lmu.ifi.dbs.elki.math.statistics.distribution.NormalDistribution; import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult; +import de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike.NumberArrayAdapter; import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID; @@ -53,71 +54,42 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter; @Reference(authors = "H.-P. Kriegel, P. Kröger, E. Schubert, A. Zimek", title = "Interpreting and Unifying Outlier Scores", booktitle = "Proc. 11th SIAM International Conference on Data Mining (SDM), Mesa, AZ, 2011", url = "http://siam.omnibooksonline.com/2011datamining/data/papers/018.pdf") public class SqrtStandardDeviationScaling implements OutlierScalingFunction { /** - * Parameter to specify the fixed minimum to use. - * <p> - * Key: {@code -sqrtstddevscale.min} - * </p> + * Effective parameters. */ - public static final OptionID MIN_ID = new OptionID("sqrtstddevscale.min", "Fixed minimum to use in sqrt scaling."); + double min, mean, factor; /** - * Parameter to specify a fixed mean to use. - * <p> - * Key: {@code -sqrtstddevscale.mean} - * </p> + * Predefined parameters. */ - public static final OptionID MEAN_ID = new OptionID("sqrtstddevscale.mean", "Fixed mean to use in standard deviation scaling."); + Double pmin = null, pmean = null; /** - * Parameter to specify the lambda value - * <p> - * Key: {@code -sqrtstddevscale.lambda} - * </p> + * Predefined lambda scaling factor. */ - public static final OptionID LAMBDA_ID = new OptionID("sqrtstddevscale.lambda", "Significance level to use for error function."); - - /** - * Field storing the lambda value - */ - protected Double lambda = null; - - /** - * Min to use - */ - Double min = null; - - /** - * Mean to use - */ - Double mean = null; - - /** - * Scaling factor to use (usually: Lambda * Stddev * Sqrt(2)) - */ - double factor; + double plambda; /** * Constructor. * - * @param min - * @param mean - * @param lambda + * @param pmin Predefined minimum + * @param pmean Predefined mean + * @param plambda Lambda parameter */ - public SqrtStandardDeviationScaling(Double min, Double mean, Double lambda) { + public SqrtStandardDeviationScaling(Double pmin, Double pmean, double plambda) { super(); - this.min = min; - this.mean = mean; - this.lambda = lambda; + this.pmin = pmin; + this.pmean = pmean; + this.plambda = plambda; } @Override public double getScaled(double value) { assert (factor != 0) : "prepare() was not run prior to using the scaling function."; - if(value <= min) { + if (value <= min) { return 0; } value = (value <= min) ? 0 : Math.sqrt(value - min); - if(value <= mean) { + if (value <= mean) { return 0; } return Math.max(0, NormalDistribution.erf((value - mean) / factor)); @@ -125,39 +97,61 @@ public class SqrtStandardDeviationScaling implements OutlierScalingFunction { @Override public void prepare(OutlierResult or) { - if(min == null) { - DoubleMinMax mm = new DoubleMinMax(); - Relation<Double> scores = or.getScores(); - for(DBIDIter id = scores.iterDBIDs(); id.valid(); id.advance()) { - double val = scores.get(id); - if(!Double.isNaN(val) && !Double.isInfinite(val)) { - mm.put(val); - } - } - min = mm.getMin(); - } - if(mean == null) { - MeanVariance mv = new MeanVariance(); + if (pmean == null) { + MeanVarianceMinMax mv = new MeanVarianceMinMax(); Relation<Double> scores = or.getScores(); - for(DBIDIter id = scores.iterDBIDs(); id.valid(); id.advance()) { + for (DBIDIter id = scores.iterDBIDs(); id.valid(); id.advance()) { double val = scores.get(id); val = (val <= min) ? 0 : Math.sqrt(val - min); mv.put(val); } + min = (pmin == null) ? mv.getMin() : pmin; mean = mv.getMean(); - factor = lambda * mv.getSampleStddev() * MathUtil.SQRT2; - } - else { + factor = plambda * mv.getSampleStddev() * MathUtil.SQRT2; + } else { + mean = pmean; double sqsum = 0; int cnt = 0; Relation<Double> scores = or.getScores(); - for(DBIDIter id = scores.iterDBIDs(); id.valid(); id.advance()) { + double mm = Double.POSITIVE_INFINITY; + for (DBIDIter id = scores.iterDBIDs(); id.valid(); id.advance()) { double val = scores.get(id); + mm = Math.min(mm, val); val = (val <= min) ? 0 : Math.sqrt(val - min); sqsum += (val - mean) * (val - mean); cnt += 1; } - factor = lambda * Math.sqrt(sqsum / cnt) * MathUtil.SQRT2; + min = (pmin == null) ? mm : pmin; + factor = plambda * Math.sqrt(sqsum / cnt) * MathUtil.SQRT2; + } + } + + @Override + public <A> void prepare(A array, NumberArrayAdapter<?, A> adapter) { + if (pmean == null) { + MeanVarianceMinMax mv = new MeanVarianceMinMax(); + final int size = adapter.size(array); + for (int i = 0; i < size; i++) { + double val = adapter.getDouble(array, i); + val = (val <= min) ? 0 : Math.sqrt(val - min); + mv.put(val); + } + min = (pmin == null) ? mv.getMin() : pmin; + mean = mv.getMean(); + factor = plambda * mv.getSampleStddev() * MathUtil.SQRT2; + } else { + mean = pmean; + Mean sqsum = new Mean(); + double mm = Double.POSITIVE_INFINITY; + final int size = adapter.size(array); + for (int i = 0; i < size; i++) { + double val = adapter.getDouble(array, i); + mm = Math.min(mm, val); + val = (val <= min) ? 0 : Math.sqrt(val - min); + sqsum.put((val - mean) * (val - mean)); + } + min = (pmin == null) ? mm : pmin; + factor = plambda * Math.sqrt(sqsum.getMean()) * MathUtil.SQRT2; } } @@ -179,30 +173,54 @@ public class SqrtStandardDeviationScaling implements OutlierScalingFunction { * @apiviz.exclude */ public static class Parameterizer extends AbstractParameterizer { + /** + * Parameter to specify the fixed minimum to use. + * <p> + * Key: {@code -sqrtstddevscale.min} + * </p> + */ + public static final OptionID MIN_ID = new OptionID("sqrtstddevscale.min", "Fixed minimum to use in sqrt scaling."); + + /** + * Parameter to specify a fixed mean to use. + * <p> + * Key: {@code -sqrtstddevscale.mean} + * </p> + */ + public static final OptionID MEAN_ID = new OptionID("sqrtstddevscale.mean", "Fixed mean to use in standard deviation scaling."); + + /** + * Parameter to specify the lambda value + * <p> + * Key: {@code -sqrtstddevscale.lambda} + * </p> + */ + public static final OptionID LAMBDA_ID = new OptionID("sqrtstddevscale.lambda", "Significance level to use for error function."); + protected Double min = null; protected Double mean = null; - protected Double lambda = null; + protected double lambda; @Override protected void makeOptions(Parameterization config) { super.makeOptions(config); DoubleParameter minP = new DoubleParameter(MIN_ID); minP.setOptional(true); - if(config.grab(minP)) { - min = minP.getValue(); + if (config.grab(minP)) { + min = minP.doubleValue(); } DoubleParameter meanP = new DoubleParameter(MEAN_ID); meanP.setOptional(true); - if(config.grab(meanP)) { - mean = meanP.getValue(); + if (config.grab(meanP)) { + mean = meanP.doubleValue(); } DoubleParameter lambdaP = new DoubleParameter(LAMBDA_ID, 3.0); - if(config.grab(lambdaP)) { - lambda = lambdaP.getValue(); + if (config.grab(lambdaP)) { + lambda = lambdaP.doubleValue(); } } @@ -211,4 +229,4 @@ public class SqrtStandardDeviationScaling implements OutlierScalingFunction { return new SqrtStandardDeviationScaling(min, mean, lambda); } } -}
\ No newline at end of file +} diff --git a/src/de/lmu/ifi/dbs/elki/utilities/scaling/outlier/StandardDeviationScaling.java b/src/de/lmu/ifi/dbs/elki/utilities/scaling/outlier/StandardDeviationScaling.java index c8ec0905..b0040571 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/scaling/outlier/StandardDeviationScaling.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/scaling/outlier/StandardDeviationScaling.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.scaling.outlier; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -30,6 +30,7 @@ import de.lmu.ifi.dbs.elki.math.Mean; import de.lmu.ifi.dbs.elki.math.MeanVariance; import de.lmu.ifi.dbs.elki.math.statistics.distribution.NormalDistribution; import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult; +import de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike.NumberArrayAdapter; import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID; @@ -45,27 +46,18 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter; * Where mean can be fixed to a given value, and stddev is then computed against * this mean. * + * Reference: + * <p> + * H.-P. Kriegel, P. Kröger, E. Schubert, A. Zimek<br /> + * Interpreting and Unifying Outlier Scores<br /> + * Proc. 11th SIAM International Conference on Data Mining (SDM), Mesa, AZ, 2011 + * </p> + * * @author Erich Schubert */ -@Reference(authors="H.-P. Kriegel, P. Kröger, E. Schubert, A. Zimek", title="Interpreting and Unifying Outlier Scores", booktitle="Proc. 11th SIAM International Conference on Data Mining (SDM), Mesa, AZ, 2011", url="http://siam.omnibooksonline.com/2011datamining/data/papers/018.pdf") +@Reference(authors = "H.-P. Kriegel, P. Kröger, E. Schubert, A. Zimek", title = "Interpreting and Unifying Outlier Scores", booktitle = "Proc. 11th SIAM International Conference on Data Mining (SDM), Mesa, AZ, 2011", url = "http://siam.omnibooksonline.com/2011datamining/data/papers/018.pdf") public class StandardDeviationScaling implements OutlierScalingFunction { /** - * Parameter to specify a fixed mean to use. - * <p> - * Key: {@code -stddevscale.mean} - * </p> - */ - public static final OptionID MEAN_ID = new OptionID("stddevscale.mean", "Fixed mean to use in standard deviation scaling."); - - /** - * Parameter to specify the lambda value - * <p> - * Key: {@code -stddevscale.lambda} - * </p> - */ - public static final OptionID LAMBDA_ID = new OptionID("stddevscale.lambda", "Significance level to use for error function."); - - /** * Field storing the fixed mean to use */ protected Double fixedmean = null; @@ -73,7 +65,7 @@ public class StandardDeviationScaling implements OutlierScalingFunction { /** * Field storing the lambda value */ - protected Double lambda = null; + protected double lambda; /** * Mean to use @@ -88,10 +80,10 @@ public class StandardDeviationScaling implements OutlierScalingFunction { /** * Constructor. * - * @param fixedmean - * @param lambda + * @param fixedmean Fixed mean + * @param lambda Scaling factor lambda */ - public StandardDeviationScaling(Double fixedmean, Double lambda) { + public StandardDeviationScaling(Double fixedmean, double lambda) { super(); this.fixedmean = fixedmean; this.lambda = lambda; @@ -107,7 +99,7 @@ public class StandardDeviationScaling implements OutlierScalingFunction { @Override public double getScaled(double value) { assert (factor != 0) : "prepare() was not run prior to using the scaling function."; - if(value <= mean) { + if (value <= mean) { return 0; } return Math.max(0, NormalDistribution.erf((value - mean) / factor)); @@ -115,12 +107,12 @@ public class StandardDeviationScaling implements OutlierScalingFunction { @Override public void prepare(OutlierResult or) { - if(fixedmean == null) { + if (fixedmean == null) { MeanVariance mv = new MeanVariance(); Relation<Double> scores = or.getScores(); - for(DBIDIter id = scores.iterDBIDs(); id.valid(); id.advance()) { + for (DBIDIter id = scores.iterDBIDs(); id.valid(); id.advance()) { double val = scores.get(id); - if(!Double.isNaN(val) && !Double.isInfinite(val)) { + if (!Double.isNaN(val) && !Double.isInfinite(val)) { mv.put(val); } } @@ -129,14 +121,46 @@ public class StandardDeviationScaling implements OutlierScalingFunction { if (factor == 0.0) { factor = Double.MIN_NORMAL; } - } - else { + } else { mean = fixedmean; Mean sqsum = new Mean(); Relation<Double> scores = or.getScores(); - for(DBIDIter id = scores.iterDBIDs(); id.valid(); id.advance()) { + for (DBIDIter id = scores.iterDBIDs(); id.valid(); id.advance()) { double val = scores.get(id); - if(!Double.isNaN(val) && !Double.isInfinite(val)) { + if (!Double.isNaN(val) && !Double.isInfinite(val)) { + sqsum.put((val - mean) * (val - mean)); + } + } + factor = lambda * Math.sqrt(sqsum.getMean()) * MathUtil.SQRT2; + if (factor == 0.0) { + factor = Double.MIN_NORMAL; + } + } + } + + @Override + public <A> void prepare(A array, NumberArrayAdapter<?, A> adapter) { + if (fixedmean == null) { + MeanVariance mv = new MeanVariance(); + final int size = adapter.size(array); + for (int i = 0; i < size; i++) { + double val = adapter.getDouble(array, i); + if (!Double.isInfinite(val)) { + mv.put(val); + } + } + mean = mv.getMean(); + factor = lambda * mv.getSampleStddev() * MathUtil.SQRT2; + if (factor == 0.0) { + factor = Double.MIN_NORMAL; + } + } else { + mean = fixedmean; + Mean sqsum = new Mean(); + final int size = adapter.size(array); + for (int i = 0; i < size; i++) { + double val = adapter.getDouble(array, i); + if (!Double.isInfinite(val)) { sqsum.put((val - mean) * (val - mean)); } } @@ -165,20 +189,36 @@ public class StandardDeviationScaling implements OutlierScalingFunction { * @apiviz.exclude */ public static class Parameterizer extends AbstractParameterizer { + /** + * Parameter to specify a fixed mean to use. + * <p> + * Key: {@code -stddevscale.mean} + * </p> + */ + public static final OptionID MEAN_ID = new OptionID("stddevscale.mean", "Fixed mean to use in standard deviation scaling."); + + /** + * Parameter to specify the lambda value + * <p> + * Key: {@code -stddevscale.lambda} + * </p> + */ + public static final OptionID LAMBDA_ID = new OptionID("stddevscale.lambda", "Significance level to use for error function."); + protected Double fixedmean = null; - protected Double lambda = null; + protected double lambda; @Override protected void makeOptions(Parameterization config) { super.makeOptions(config); DoubleParameter meanP = new DoubleParameter(MEAN_ID); meanP.setOptional(true); - if(config.grab(meanP)) { + if (config.grab(meanP)) { fixedmean = meanP.getValue(); } DoubleParameter lambdaP = new DoubleParameter(LAMBDA_ID, 3.0); - if(config.grab(lambdaP)) { + if (config.grab(lambdaP)) { lambda = lambdaP.getValue(); } } @@ -188,4 +228,4 @@ public class StandardDeviationScaling implements OutlierScalingFunction { return new StandardDeviationScaling(fixedmean, lambda); } } -}
\ No newline at end of file +} diff --git a/src/de/lmu/ifi/dbs/elki/utilities/scaling/outlier/TopKOutlierScaling.java b/src/de/lmu/ifi/dbs/elki/utilities/scaling/outlier/TopKOutlierScaling.java index f8609f3b..25103dbc 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/scaling/outlier/TopKOutlierScaling.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/scaling/outlier/TopKOutlierScaling.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.scaling.outlier; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -26,6 +26,9 @@ package de.lmu.ifi.dbs.elki.utilities.scaling.outlier; import de.lmu.ifi.dbs.elki.database.ids.DBIDIter; import de.lmu.ifi.dbs.elki.logging.LoggingUtil; import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult; +import de.lmu.ifi.dbs.elki.utilities.datastructures.QuickSelect; +import de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike.ArrayLikeUtil; +import de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike.NumberArrayAdapter; import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID; import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterConstraint; @@ -94,29 +97,44 @@ public class TopKOutlierScaling implements OutlierScalingFunction { @Override public void prepare(OutlierResult or) { - if(k <= 0) { + if (k <= 0) { LoggingUtil.warning("No k configured for Top-k outlier scaling!"); } DBIDIter order = or.getOrdering().iter(or.getOrdering().getDBIDs()).iter(); - for(int i = 0; i < k && order.valid(); i++, order.advance()) { + for (int i = 0; i < k && order.valid(); i++, order.advance()) { cutoff = or.getScores().get(order); } max = or.getOutlierMeta().getActualMaximum(); ground = or.getOutlierMeta().getTheoreticalBaseline(); - if(Double.isInfinite(ground) || Double.isNaN(ground)) { + if (Double.isInfinite(ground) || Double.isNaN(ground)) { ground = or.getOutlierMeta().getTheoreticalMinimum(); } - if(Double.isInfinite(ground) || Double.isNaN(ground)) { + if (Double.isInfinite(ground) || Double.isNaN(ground)) { ground = or.getOutlierMeta().getActualMinimum(); } - if(Double.isInfinite(ground) || Double.isNaN(ground)) { + if (Double.isInfinite(ground) || Double.isNaN(ground)) { ground = Math.min(0.0, cutoff); } } @Override + public <A> void prepare(A array, NumberArrayAdapter<?, A> adapter) { + if (k <= 0) { + LoggingUtil.warning("No k configured for Top-k outlier scaling!"); + } + double[] scores = ArrayLikeUtil.toPrimitiveDoubleArray(array, adapter); + QuickSelect.quickSelect(scores, k); + cutoff = scores[k - 1]; + max = Double.NEGATIVE_INFINITY; + for (double v : scores) { + max = Math.max(max, v); + } + ground = Math.min(0.0, cutoff); + } + + @Override public double getMax() { - if(binary) { + if (binary) { return 1.0; } return max; @@ -124,7 +142,7 @@ public class TopKOutlierScaling implements OutlierScalingFunction { @Override public double getMin() { - if(binary) { + if (binary) { return 0.0; } return ground; @@ -132,19 +150,16 @@ public class TopKOutlierScaling implements OutlierScalingFunction { @Override public double getScaled(double value) { - if(binary) { - if(value >= cutoff) { + if (binary) { + if (value >= cutoff) { return 1; - } - else { + } else { return 0; } - } - else { - if(value >= cutoff) { + } else { + if (value >= cutoff) { return (value - ground) / (max - ground); - } - else { + } else { return 0.0; } } @@ -167,12 +182,12 @@ public class TopKOutlierScaling implements OutlierScalingFunction { super.makeOptions(config); IntParameter kP = new IntParameter(K_ID); kP.addConstraint(new GreaterConstraint(1)); - if(config.grab(kP)) { + if (config.grab(kP)) { k = kP.intValue(); } Flag binaryF = new Flag(BINARY_ID); - if(config.grab(binaryF)) { + if (config.grab(binaryF)) { binary = binaryF.isTrue(); } } @@ -182,4 +197,4 @@ public class TopKOutlierScaling implements OutlierScalingFunction { return new TopKOutlierScaling(k, binary); } } -}
\ No newline at end of file +} diff --git a/src/de/lmu/ifi/dbs/elki/utilities/scaling/outlier/package-info.java b/src/de/lmu/ifi/dbs/elki/utilities/scaling/outlier/package-info.java index c2adba6b..a34aac08 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/scaling/outlier/package-info.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/scaling/outlier/package-info.java @@ -5,7 +5,7 @@ This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures -Copyright (C) 2012 +Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/utilities/scaling/package-info.java b/src/de/lmu/ifi/dbs/elki/utilities/scaling/package-info.java index 430f1bf7..633aa65a 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/scaling/package-info.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/scaling/package-info.java @@ -5,7 +5,7 @@ This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures -Copyright (C) 2012 +Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team |