summaryrefslogtreecommitdiff
path: root/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/INFLO.java
diff options
context:
space:
mode:
authorAndrej Shadura <andrewsh@debian.org>2019-03-09 22:30:40 +0000
committerAndrej Shadura <andrewsh@debian.org>2019-03-09 22:30:40 +0000
commit337087b668d3a54f3afee3a9adb597a32e9f7e94 (patch)
treed860094269622472f8079d497ac7af02dbb4e038 /src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/INFLO.java
parent14a486343aef55f97f54082d6b542dedebf6f3ba (diff)
Import Upstream version 0.6.5~20141030
Diffstat (limited to 'src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/INFLO.java')
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/INFLO.java232
1 files changed, 141 insertions, 91 deletions
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/INFLO.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/INFLO.java
index 28fcf01b..611701ab 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/INFLO.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/INFLO.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier.lof;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2014
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -34,15 +34,16 @@ import de.lmu.ifi.dbs.elki.database.datastore.WritableDataStore;
import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
+import de.lmu.ifi.dbs.elki.database.ids.KNNList;
import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs;
-import de.lmu.ifi.dbs.elki.database.ids.distance.KNNList;
import de.lmu.ifi.dbs.elki.database.query.DatabaseQuery;
import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery;
-import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
+import de.lmu.ifi.dbs.elki.database.relation.DoubleRelation;
+import de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
-import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.math.DoubleMinMax;
import de.lmu.ifi.dbs.elki.math.Mean;
@@ -59,18 +60,19 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
/**
- * INFLO provides the Mining Algorithms (Two-way Search Method) for Influence
- * Outliers using Symmetric Relationship
- * <p>
+ * Influence Outliers using Symmetric Relationship (INFLO) using two-way search,
+ * is an outlier detection method based on LOF; but also using the reverse kNN.
+ *
* Reference: <br>
* <p>
- * Jin, W., Tung, A., Han, J., and Wang, W. 2006<br/>
- * Ranking outliers using symmetric neighborhood relationship<br/>
- * In Proc. Pacific-Asia Conf. on Knowledge Discovery and Data Mining (PAKDD),
- * Singapore
+ * W. Jin, A. Tung, J. Han, and W. Wang<br />
+ * Ranking outliers using symmetric neighborhood relationship<br />
+ * Proc. 10th Pacific-Asia conference on Advances in Knowledge Discovery and
+ * Data Mining, 2006.
* </p>
*
* @author Ahmed Hettab
+ * @author Erich Schubert
*
* @apiviz.has KNNQuery
*
@@ -78,35 +80,23 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
*/
@Title("INFLO: Influenced Outlierness Factor")
@Description("Ranking Outliers Using Symmetric Neigborhood Relationship")
-@Reference(authors = "Jin, W., Tung, A., Han, J., and Wang, W", title = "Ranking outliers using symmetric neighborhood relationship", booktitle = "Proc. Pacific-Asia Conf. on Knowledge Discovery and Data Mining (PAKDD), Singapore, 2006", url = "http://dx.doi.org/10.1007/11731139_68")
-public class INFLO<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedAlgorithm<O, D, OutlierResult> implements OutlierAlgorithm {
+@Reference(authors = "W. Jin, A. Tung, J. Han, and W. Wang", //
+title = "Ranking outliers using symmetric neighborhood relationship", //
+booktitle = "Proc. 10th Pacific-Asia conference on Advances in Knowledge Discovery and Data Mining", //
+url = "http://dx.doi.org/10.1007/11731139_68")
+public class INFLO<O> extends AbstractDistanceBasedAlgorithm<O, OutlierResult> implements OutlierAlgorithm {
/**
* The logger for this class.
*/
private static final Logging LOG = Logging.getLogger(INFLO.class);
/**
- * Parameter to specify if any object is a Core Object must be a double
- * greater than 0.0
- * <p>
- * see paper "Two-way search method" 3.2
- */
- public static final OptionID M_ID = new OptionID("inflo.m", "The threshold");
-
- /**
- * Holds the value of {@link #M_ID}.
+ * Pruning threshold m.
*/
private double m;
/**
- * Parameter to specify the number of nearest neighbors of an object to be
- * considered for computing its INFLO_SCORE. must be an integer greater than
- * 1.
- */
- public static final OptionID K_ID = new OptionID("inflo.k", "The number of nearest neighbors of an object to be considered for computing its INFLO_SCORE.");
-
- /**
- * Holds the value of {@link #K_ID}.
+ * Number of neighbors to use.
*/
private int k;
@@ -117,7 +107,7 @@ public class INFLO<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBa
* @param m m Parameter
* @param k k Parameter
*/
- public INFLO(DistanceFunction<? super O, D> distanceFunction, double m, int k) {
+ public INFLO(DistanceFunction<? super O> distanceFunction, double m, int k) {
super(distanceFunction);
this.m = m;
this.k = k;
@@ -131,9 +121,9 @@ public class INFLO<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBa
* @return Outlier result
*/
public OutlierResult run(Database database, Relation<O> relation) {
- DistanceQuery<O, D> distFunc = database.getDistanceQuery(relation, getDistanceFunction());
+ DistanceQuery<O> distFunc = database.getDistanceQuery(relation, getDistanceFunction());
+ KNNQuery<O> knnQuery = database.getKNNQuery(distFunc, k + 1, DatabaseQuery.HINT_HEAVY_USE);
- ModifiableDBIDs processedIDs = DBIDUtil.newHashSet(relation.size());
ModifiableDBIDs pruned = DBIDUtil.newHashSet();
// KNNS
WritableDataStore<ModifiableDBIDs> knns = DataStoreUtil.makeStorage(relation.getDBIDs(), DataStoreFactory.HINT_TEMP | DataStoreFactory.HINT_HOT, ModifiableDBIDs.class);
@@ -147,72 +137,112 @@ public class INFLO<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBa
rnns.put(iditer, DBIDUtil.newArray());
}
- // TODO: use kNN preprocessor?
- KNNQuery<O, D> knnQuery = database.getKNNQuery(distFunc, k, DatabaseQuery.HINT_HEAVY_USE);
+ computeNeighborhoods(relation, knnQuery, pruned, knns, rnns, density);
- for(DBIDIter id = relation.iterDBIDs(); id.valid(); id.advance()) {
- // if not visited count=0
- int count = rnns.get(id).size();
- if(!processedIDs.contains(id)) {
- // TODO: use exactly k neighbors?
- KNNList<D> list = knnQuery.getKNNForDBID(id, k);
- knns.get(id).addDBIDs(list);
- processedIDs.add(id);
- density.putDouble(id, 1 / list.getKNNDistance().doubleValue());
+ // Calculate INFLO for any Object
+ DoubleMinMax inflominmax = new DoubleMinMax();
+ WritableDoubleDataStore inflos = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC);
+ // Note: this modifies knns, by adding rknns!
+ computeINFLO(relation, pruned, knns, rnns, density, inflos, inflominmax);
- }
- ModifiableDBIDs s = knns.get(id);
- for(DBIDIter q = knns.get(id).iter(); q.valid(); q.advance()) {
- if(!processedIDs.contains(q)) {
- // TODO: use exactly k neighbors?
- KNNList<D> listQ = knnQuery.getKNNForDBID(q, k);
- knns.get(q).addDBIDs(listQ);
- density.putDouble(q, 1 / listQ.getKNNDistance().doubleValue());
- processedIDs.add(q);
- }
+ // Build result representation.
+ DoubleRelation scoreResult = new MaterializedDoubleRelation("Influence Outlier Score", "inflo-outlier", inflos, relation.getDBIDs());
+ OutlierScoreMeta scoreMeta = new QuotientOutlierScoreMeta(inflominmax.getMin(), inflominmax.getMax(), 0., Double.POSITIVE_INFINITY, 1.);
+ return new OutlierResult(scoreMeta, scoreResult);
+ }
- if(knns.get(q).contains(id)) {
- rnns.get(q).add(id);
- rnns.get(id).add(q);
+ /**
+ * Compute neighborhoods
+ *
+ * @param relation
+ * @param knnQuery
+ * @param pruned
+ * @param knns
+ * @param rnns
+ * @param density
+ */
+ protected void computeNeighborhoods(Relation<O> relation, KNNQuery<O> knnQuery, ModifiableDBIDs pruned, WritableDataStore<ModifiableDBIDs> knns, WritableDataStore<ModifiableDBIDs> rnns, WritableDoubleDataStore density) {
+ for(DBIDIter iter = relation.iterDBIDs(); iter.valid(); iter.advance()) {
+ // if not visited count=0
+ int count = rnns.get(iter).size();
+ DBIDs knn = getKNN(iter, knnQuery, knns, density);
+ for(DBIDIter niter = knn.iter(); niter.valid(); niter.advance()) {
+ // Ignore the query point itself.
+ if(DBIDUtil.equal(iter, niter)) {
+ continue;
+ }
+ if(getKNN(niter, knnQuery, knns, density).contains(iter)) {
+ rnns.get(niter).add(iter);
+ rnns.get(iter).add(niter);
count++;
}
}
- if(count >= s.size() * m) {
- pruned.add(id);
+ if(count >= knn.size() * m) {
+ pruned.add(iter);
}
}
+ }
- // Calculate INFLO for any Object
- // IF Object is pruned INFLO=1.0
- DoubleMinMax inflominmax = new DoubleMinMax();
- WritableDoubleDataStore inflos = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC);
- for(DBIDIter id = relation.iterDBIDs(); id.valid(); id.advance()) {
- if(!pruned.contains(id)) {
- ModifiableDBIDs knn = knns.get(id);
- ModifiableDBIDs rnn = rnns.get(id);
-
- double denP = density.doubleValue(id);
- knn.addDBIDs(rnn);
- Mean mean = new Mean();
- for(DBIDIter iter = knn.iter(); iter.valid(); iter.advance()) {
- mean.put(density.doubleValue(iter));
+ /**
+ * Compute the final INFLO scores.
+ *
+ * @param relation Data relation
+ * @param pruned Pruned objects
+ * @param knns kNN storage
+ * @param rnns reverse kNN storage
+ * @param density Density estimation
+ * @param inflos Inflo score storage
+ * @param inflominmax Output of minimum and maximum
+ */
+ protected void computeINFLO(Relation<O> relation, ModifiableDBIDs pruned, WritableDataStore<ModifiableDBIDs> knns, WritableDataStore<ModifiableDBIDs> rnns, WritableDoubleDataStore density, WritableDoubleDataStore inflos, DoubleMinMax inflominmax) {
+ Mean mean = new Mean();
+ for(DBIDIter iter = relation.iterDBIDs(); iter.valid(); iter.advance()) {
+ if(pruned.contains(iter)) {
+ inflos.putDouble(iter, 1.);
+ inflominmax.put(1.);
+ continue;
+ }
+ ModifiableDBIDs knn = knns.get(iter), rnn = rnns.get(iter);
+ knn.addDBIDs(rnn);
+ // Compute mean density of NN \cup RNN
+ mean.reset();
+ for(DBIDIter niter = knn.iter(); niter.valid(); niter.advance()) {
+ if(DBIDUtil.equal(iter, niter)) {
+ continue;
}
- double den = mean.getMean() / denP;
- inflos.putDouble(id, den);
- // update minimum and maximum
- inflominmax.put(den);
-
+ mean.put(density.doubleValue(niter));
}
- if(pruned.contains(id)) {
- inflos.putDouble(id, 1.0);
- inflominmax.put(1.0);
+ double denP = density.doubleValue(iter);
+ double den;
+ if(denP > 0.) {
+ den = mean.getMean() / denP;
}
+ else {
+ den = mean.getMean() == 0 ? 1. : Double.POSITIVE_INFINITY;
+ }
+ inflos.putDouble(iter, den);
+ // update minimum and maximum
+ inflominmax.put(den);
}
+ }
- // Build result representation.
- Relation<Double> scoreResult = new MaterializedRelation<>("Influence Outlier Score", "inflo-outlier", TypeUtil.DOUBLE, inflos, relation.getDBIDs());
- OutlierScoreMeta scoreMeta = new QuotientOutlierScoreMeta(inflominmax.getMin(), inflominmax.getMax(), 0.0, Double.POSITIVE_INFINITY, 1.0);
- return new OutlierResult(scoreMeta, scoreResult);
+ /**
+ * Get the (forward only) kNN of an object, including the query point
+ *
+ * @param q Query point
+ * @param knnQuery Query function
+ * @param knns kNN storage
+ * @param density Density storage
+ * @return Neighbor list
+ */
+ protected DBIDs getKNN(DBIDIter q, KNNQuery<O> knnQuery, WritableDataStore<ModifiableDBIDs> knns, WritableDoubleDataStore density) {
+ ModifiableDBIDs s = knns.get(q);
+ if(s.size() == 0) {
+ KNNList listQ = knnQuery.getKNNForDBID(q, k + 1);
+ s.addDBIDs(listQ);
+ density.putDouble(q, 1. / listQ.getKNNDistance());
+ }
+ return s;
}
@Override
@@ -232,29 +262,49 @@ public class INFLO<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBa
*
* @apiviz.exclude
*/
- public static class Parameterizer<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedAlgorithm.Parameterizer<O, D> {
+ public static class Parameterizer<O> extends AbstractDistanceBasedAlgorithm.Parameterizer<O> {
+ /**
+ * Parameter to specify if any object is a Core Object must be a double
+ * greater than 0.0
+ *
+ * see paper "Two-way search method" 3.2
+ */
+ public static final OptionID M_ID = new OptionID("inflo.m", "The pruning threshold");
+
+ /**
+ * Parameter to specify the number of nearest neighbors of an object to be
+ * considered for computing its INFLO score.
+ */
+ public static final OptionID K_ID = new OptionID("inflo.k", "The number of nearest neighbors of an object to be considered for computing its INFLO score.");
+
+ /**
+ * M parameter
+ */
protected double m = 1.0;
+ /**
+ * Number of neighbors to use.
+ */
protected int k = 0;
@Override
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
- final DoubleParameter mP = new DoubleParameter(M_ID, 1.0);
- mP.addConstraint(CommonConstraints.GREATER_THAN_ZERO_DOUBLE);
+ final DoubleParameter mP = new DoubleParameter(M_ID, 1.0)//
+ .addConstraint(CommonConstraints.GREATER_THAN_ZERO_DOUBLE);
if(config.grab(mP)) {
m = mP.doubleValue();
}
- final IntParameter kP = new IntParameter(K_ID);
- kP.addConstraint(CommonConstraints.GREATER_THAN_ONE_INT);
+ final IntParameter kP = new IntParameter(K_ID) //
+ .addConstraint(CommonConstraints.GREATER_EQUAL_ONE_INT);
if(config.grab(kP)) {
k = kP.intValue();
}
}
@Override
- protected INFLO<O, D> makeInstance() {
+ protected INFLO<O> makeInstance() {
return new INFLO<>(distanceFunction, m, k);
}
}