summaryrefslogtreecommitdiff
path: root/elki/src/main/java/de/lmu/ifi/dbs/elki/utilities/scaling/outlier/COPOutlierScaling.java
diff options
context:
space:
mode:
Diffstat (limited to 'elki/src/main/java/de/lmu/ifi/dbs/elki/utilities/scaling/outlier/COPOutlierScaling.java')
-rw-r--r--elki/src/main/java/de/lmu/ifi/dbs/elki/utilities/scaling/outlier/COPOutlierScaling.java172
1 files changed, 172 insertions, 0 deletions
diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/utilities/scaling/outlier/COPOutlierScaling.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/utilities/scaling/outlier/COPOutlierScaling.java
new file mode 100644
index 00000000..122e93b2
--- /dev/null
+++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/utilities/scaling/outlier/COPOutlierScaling.java
@@ -0,0 +1,172 @@
+package de.lmu.ifi.dbs.elki.utilities.scaling.outlier;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2015
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import java.util.Arrays;
+
+import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
+import de.lmu.ifi.dbs.elki.database.relation.DoubleRelation;
+import de.lmu.ifi.dbs.elki.math.statistics.distribution.Distribution;
+import de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator.meta.BestFitEstimator;
+import de.lmu.ifi.dbs.elki.result.outlier.InvertedOutlierScoreMeta;
+import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult;
+import de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike.ArrayLikeUtil;
+import de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike.NumberArrayAdapter;
+import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
+import de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter;
+
+/**
+ * CDF based outlier score scaling.
+ *
+ * Enhanced version of the scaling proposed in:
+ * <p>
+ * H.-P. Kriegel, P. Kröger, E. Schubert, A. Zimek<br />
+ * Interpreting and Unifying Outlier Scores<br />
+ * Proc. 11th SIAM International Conference on Data Mining (SDM), Mesa, AZ, 2011
+ * </p>
+ *
+ * See also:
+ * <p>
+ * Hans-Peter Kriegel, Peer Kröger, Erich Schubert, Arthur Zimek<br />
+ * Outlier Detection in Arbitrarily Oriented Subspaces<br />
+ * in: Proc. IEEE International Conference on Data Mining (ICDM 2012)
+ * </p>
+ *
+ * @author Erich Schubert
+ */
+@Reference(authors = "H.-P. Kriegel, P. Kröger, E. Schubert, A. Zimek", title = "Interpreting and Unifying Outlier Scores", booktitle = "Proc. 11th SIAM International Conference on Data Mining (SDM), Mesa, AZ, 2011", url = "http://siam.omnibooksonline.com/2011datamining/data/papers/018.pdf")
+public class COPOutlierScaling implements OutlierScalingFunction {
+ /**
+ * Phi parameter.
+ */
+ private double phi = 0.;
+
+ /**
+ * Score distribution.
+ */
+ private Distribution dist;
+
+ /**
+ * Inversion flag.
+ */
+ private boolean inverted = false;
+
+ /**
+ * Constructor.
+ *
+ * @param phi Phi parameter
+ */
+ public COPOutlierScaling(double phi) {
+ super();
+ this.phi = phi;
+ }
+
+ /**
+ * Secondary reference.
+ */
+ @Reference(authors = "Hans-Peter Kriegel, Peer Kröger, Erich Schubert, Arthur Zimek", title = "Outlier Detection in Arbitrarily Oriented Subspaces", booktitle = "Proc. IEEE International Conference on Data Mining (ICDM 2012)")
+ public static final void secondReference() {
+ // Dummy, reference attachment point only.
+ }
+
+ @Override
+ public double getScaled(double value) {
+ if (dist == null) {
+ throw new AbortException("Programming error: outlier scaling not initialized.");
+ }
+ double s = inverted ? (1 - dist.cdf(value)) : dist.cdf(value);
+ return (phi > 0.) ? (phi * s) / (1 - s + phi) : s;
+ }
+
+ @Override
+ public double getMin() {
+ return 0.;
+ }
+
+ @Override
+ public double getMax() {
+ return 1.;
+ }
+
+ @Override
+ public void prepare(OutlierResult or) {
+ double[] s;
+ {
+ DoubleRelation scores = or.getScores();
+ s = new double[scores.size()];
+ int i = 0;
+ for (DBIDIter id = scores.iterDBIDs(); id.valid(); id.advance(), i++) {
+ s[i] = scores.doubleValue(id);
+ }
+ }
+ Arrays.sort(s);
+ dist = BestFitEstimator.STATIC.estimate(s, ArrayLikeUtil.DOUBLEARRAYADAPTER);
+ inverted = (or.getOutlierMeta() instanceof InvertedOutlierScoreMeta);
+ }
+
+ @Override
+ public <A> void prepare(A array, NumberArrayAdapter<?, A> adapter) {
+ double[] s = ArrayLikeUtil.toPrimitiveDoubleArray(array, adapter);
+ Arrays.sort(s);
+ dist = BestFitEstimator.STATIC.estimate(s, ArrayLikeUtil.DOUBLEARRAYADAPTER);
+ inverted = false; // Not supported
+ }
+
+ /**
+ * Parameterization class.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ */
+ public static class Parameterizer extends AbstractParameterizer {
+ /**
+ * Phi parameter.
+ */
+ public static final OptionID PHI_ID = new OptionID("copscaling.phi", "Phi parameter, expected rate of outliers. Set to 0 to use raw CDF values.");
+
+ /**
+ * Phi value.
+ */
+ private double phi = 0.;
+
+ @Override
+ protected void makeOptions(Parameterization config) {
+ super.makeOptions(config);
+ DoubleParameter phiP = new DoubleParameter(PHI_ID);
+ if (config.grab(phiP)) {
+ phi = phiP.doubleValue();
+ }
+ }
+
+ @Override
+ protected COPOutlierScaling makeInstance() {
+ return new COPOutlierScaling(phi);
+ }
+ }
+}