summaryrefslogtreecommitdiff
path: root/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/SimplifiedLOF.java
diff options
context:
space:
mode:
Diffstat (limited to 'src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/SimplifiedLOF.java')
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/SimplifiedLOF.java198
1 files changed, 91 insertions, 107 deletions
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/SimplifiedLOF.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/SimplifiedLOF.java
index d54b053f..8fce6503 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/SimplifiedLOF.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/SimplifiedLOF.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier.lof;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2013
+ Copyright (C) 2014
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -28,26 +28,19 @@ import de.lmu.ifi.dbs.elki.algorithm.outlier.OutlierAlgorithm;
import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
import de.lmu.ifi.dbs.elki.database.Database;
-import de.lmu.ifi.dbs.elki.database.QueryUtil;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
-import de.lmu.ifi.dbs.elki.database.ids.distance.DistanceDBIDListIter;
-import de.lmu.ifi.dbs.elki.database.ids.distance.DoubleDistanceDBIDListIter;
-import de.lmu.ifi.dbs.elki.database.ids.distance.DoubleDistanceKNNList;
-import de.lmu.ifi.dbs.elki.database.ids.distance.KNNList;
-import de.lmu.ifi.dbs.elki.database.query.DatabaseQuery;
-import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
+import de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter;
+import de.lmu.ifi.dbs.elki.database.ids.KNNList;
import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery;
-import de.lmu.ifi.dbs.elki.database.query.knn.PreprocessorKNNQuery;
-import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
+import de.lmu.ifi.dbs.elki.database.relation.DoubleRelation;
+import de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
-import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance;
-import de.lmu.ifi.dbs.elki.index.preprocessed.knn.MaterializeKNNPreprocessor;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
import de.lmu.ifi.dbs.elki.logging.progress.StepProgress;
@@ -56,6 +49,7 @@ import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta;
import de.lmu.ifi.dbs.elki.result.outlier.QuotientOutlierScoreMeta;
import de.lmu.ifi.dbs.elki.utilities.Alias;
+import de.lmu.ifi.dbs.elki.utilities.DatabaseUtil;
import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
@@ -68,28 +62,30 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
* Reference:
* <p>
* Erich Schubert, Arthur Zimek, Hans-Peter Kriegel<br />
- * Local outlier detection reconsidered: a generalized view on locality with
- * applications to spatial, video, and network outlier detection<br />
- * In: Data Mining and Knowledge Discovery
+ * Local Outlier Detection Reconsidered: a Generalized View on Locality with
+ * Applications to Spatial, Video, and Network Outlier Detection<br />
+ * Data Mining and Knowledge Discovery, 28(1): 190–237, 2014.
* </p>
*
* @author Erich Schubert
*
* @apiviz.has KNNQuery
*
- * @param <O> the type of DatabaseObjects handled by this Algorithm
- * @param <D> Distance type
+ * @param <O> the type of data objects handled by this algorithm
*/
-@Reference(authors = "Erich Schubert, Arthur Zimek, Hans-Peter Kriegel", title = "Local outlier detection reconsidered: a generalized view on locality with applications to spatial, video, and network outlier detection", booktitle = "Data Mining and Knowledge Discovery", url = "http://dx.doi.org/10.1007/s10618-012-0300-z")
-@Alias({ "SimpleLOF", "outlier.SimpleLOF", "de.lmu.ifi.dbs.elki.algorithm.outlier.SimpleLOF" })
-public class SimplifiedLOF<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedAlgorithm<O, D, OutlierResult> implements OutlierAlgorithm {
+@Reference(authors = "E. Schubert, A. Zimek, H.-P. Kriegel", //
+title = "Local Outlier Detection Reconsidered: a Generalized View on Locality with Applications to Spatial, Video, and Network Outlier Detection", //
+booktitle = "Data Mining and Knowledge Discovery, 28(1): 190–237, 2014.", //
+url = "http://dx.doi.org/10.1007/s10618-012-0300-z")
+@Alias({ "SimplifiedLOF", "outlier.SimplifiedLOF", "de.lmu.ifi.dbs.elki.algorithm.outlier.SimplifiedLOF" })
+public class SimplifiedLOF<O> extends AbstractDistanceBasedAlgorithm<O, OutlierResult> implements OutlierAlgorithm {
/**
* The logger for this class.
*/
private static final Logging LOG = Logging.getLogger(SimplifiedLOF.class);
/**
- * Parameter k.
+ * The number of neighbors to query, excluding the query point.
*/
protected int k;
@@ -98,7 +94,7 @@ public class SimplifiedLOF<O, D extends NumberDistance<D, ?>> extends AbstractDi
*
* @param k the value of k
*/
- public SimplifiedLOF(int k, DistanceFunction<? super O, D> distance) {
+ public SimplifiedLOF(int k, DistanceFunction<? super O> distance) {
super(distance);
this.k = k + 1;
}
@@ -111,114 +107,103 @@ public class SimplifiedLOF<O, D extends NumberDistance<D, ?>> extends AbstractDi
* @return LOF outlier result
*/
public OutlierResult run(Database database, Relation<O> relation) {
- StepProgress stepprog = LOG.isVerbose() ? new StepProgress("SimpleLOF", 3) : null;
-
+ StepProgress stepprog = LOG.isVerbose() ? new StepProgress("Simplified LOF", 3) : null;
DBIDs ids = relation.getDBIDs();
- // "HEAVY" flag for KNN Query since it is used more than once
- KNNQuery<O, D> knnq = QueryUtil.getKNNQuery(relation, getDistanceFunction(), k, DatabaseQuery.HINT_HEAVY_USE, DatabaseQuery.HINT_OPTIMIZED_ONLY, DatabaseQuery.HINT_NO_CACHE);
- // No optimized kNN query - use a preprocessor!
- if(!(knnq instanceof PreprocessorKNNQuery)) {
- if(stepprog != null) {
- stepprog.beginStep(1, "Materializing neighborhoods w.r.t. distance function.", LOG);
- }
- MaterializeKNNPreprocessor<O, D> preproc = new MaterializeKNNPreprocessor<>(relation, getDistanceFunction(), k);
- database.addIndex(preproc);
- DistanceQuery<O, D> rdq = database.getDistanceQuery(relation, getDistanceFunction());
- knnq = preproc.getKNNQuery(rdq, k);
- }
+ LOG.beginStep(stepprog, 1, "Materializing neighborhoods w.r.t. distance function.");
+ KNNQuery<O> knnq = DatabaseUtil.precomputedKNNQuery(database, relation, getDistanceFunction(), k);
// Compute LRDs
- if(stepprog != null) {
- stepprog.beginStep(2, "Computing densities.", LOG);
- }
+ LOG.beginStep(stepprog, 2, "Computing densities.");
WritableDoubleDataStore dens = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP);
- FiniteProgress densProgress = LOG.isVerbose() ? new FiniteProgress("Densities", ids.size(), LOG) : null;
- for(DBIDIter it = ids.iter(); it.valid(); it.advance()) {
- final KNNList<D> neighbors = knnq.getKNNForDBID(it, k);
+ computeSimplifiedLRDs(ids, knnq, dens);
+
+ // compute LOF_SCORE of each db object
+ LOG.beginStep(stepprog, 3, "Computing SLOFs.");
+ WritableDoubleDataStore lofs = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_STATIC);
+ DoubleMinMax lofminmax = new DoubleMinMax();
+ computeSimplifiedLOFs(ids, knnq, dens, lofs, lofminmax);
+
+ LOG.setCompleted(stepprog);
+
+ // Build result representation.
+ DoubleRelation scoreResult = new MaterializedDoubleRelation("Simplified Local Outlier Factor", "simplified-lof-outlier", lofs, ids);
+ OutlierScoreMeta scoreMeta = new QuotientOutlierScoreMeta(lofminmax.getMin(), lofminmax.getMax(), 0., Double.POSITIVE_INFINITY, 1.);
+ OutlierResult result = new OutlierResult(scoreMeta, scoreResult);
+
+ return result;
+ }
+
+ /**
+ * Compute the simplified reachability densities.
+ *
+ * @param ids IDs to process
+ * @param knnq kNN query class
+ * @param lrds Density output
+ */
+ private void computeSimplifiedLRDs(DBIDs ids, KNNQuery<O> knnq, WritableDoubleDataStore lrds) {
+ FiniteProgress lrdsProgress = LOG.isVerbose() ? new FiniteProgress("Densities", ids.size(), LOG) : null;
+ for(DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
+ final KNNList neighbors = knnq.getKNNForDBID(iter, k);
double sum = 0.0;
int count = 0;
- if(neighbors instanceof DoubleDistanceKNNList) {
- // Fast version for double distances
- for(DoubleDistanceDBIDListIter neighbor = ((DoubleDistanceKNNList) neighbors).iter(); neighbor.valid(); neighbor.advance()) {
- if(DBIDUtil.equal(neighbor, it)) {
- continue;
- }
- sum += neighbor.doubleDistance();
- count++;
- }
- }
- else {
- for(DistanceDBIDListIter<D> neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) {
- if(DBIDUtil.equal(neighbor, it)) {
- continue;
- }
- sum += neighbor.getDistance().doubleValue();
- count++;
+ for(DoubleDBIDListIter neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) {
+ if(DBIDUtil.equal(neighbor, iter)) {
+ continue;
}
+ sum += neighbor.doubleValue();
+ count++;
}
// Avoid division by 0
- final double lrd = (sum > 0) ? (count / sum) : 0;
- dens.putDouble(it, lrd);
- if(densProgress != null) {
- densProgress.incrementProcessed(LOG);
- }
- }
- if(densProgress != null) {
- densProgress.ensureCompleted(LOG);
- }
-
- // compute LOF_SCORE of each db object
- if(stepprog != null) {
- stepprog.beginStep(3, "Computing SLOFs.", LOG);
+ final double lrd = (sum > 0) ? (count / sum) : Double.POSITIVE_INFINITY;
+ lrds.putDouble(iter, lrd);
+ LOG.incrementProcessed(lrdsProgress);
}
- WritableDoubleDataStore lofs = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_STATIC);
- // track the maximum value for normalization.
- DoubleMinMax lofminmax = new DoubleMinMax();
+ LOG.ensureCompleted(lrdsProgress);
+ }
- FiniteProgress progressLOFs = LOG.isVerbose() ? new FiniteProgress("Simple LOF scores.", ids.size(), LOG) : null;
- for(DBIDIter it = ids.iter(); it.valid(); it.advance()) {
- final double lrdp = dens.doubleValue(it);
+ /**
+ * Compute the simplified LOF factors.
+ *
+ * @param ids IDs to compute for
+ * @param knnq kNN query class
+ * @param slrds Object densities
+ * @param lofs SLOF output storage
+ * @param lofminmax Minimum and maximum scores
+ */
+ private void computeSimplifiedLOFs(DBIDs ids, KNNQuery<O> knnq, WritableDoubleDataStore slrds, WritableDoubleDataStore lofs, DoubleMinMax lofminmax) {
+ FiniteProgress progressLOFs = LOG.isVerbose() ? new FiniteProgress("Simplified LOF scores.", ids.size(), LOG) : null;
+ for(DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
final double lof;
- if(lrdp > 0) {
- final KNNList<D> neighbors = knnq.getKNNForDBID(it, k);
- double sum = 0.0;
+ final double lrdp = slrds.doubleValue(iter);
+ final KNNList neighbors = knnq.getKNNForDBID(iter, k);
+ if(!Double.isInfinite(lrdp)) {
+ double sum = 0.;
int count = 0;
for(DBIDIter neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) {
// skip the point itself
- if(DBIDUtil.equal(neighbor, it)) {
+ if(DBIDUtil.equal(neighbor, iter)) {
continue;
}
- sum += dens.doubleValue(neighbor);
+ final double val = slrds.doubleValue(neighbor);
+ sum += val;
count++;
+ if(Double.isInfinite(val)) {
+ break;
+ }
}
- lof = sum / (count * lrdp);
+ lof = sum / (lrdp * count);
}
else {
lof = 1.0;
}
- lofs.putDouble(it, lof);
+ lofs.putDouble(iter, lof);
// update minimum and maximum
lofminmax.put(lof);
- if(progressLOFs != null) {
- progressLOFs.incrementProcessed(LOG);
- }
- }
- if(progressLOFs != null) {
- progressLOFs.ensureCompleted(LOG);
- }
-
- if(stepprog != null) {
- stepprog.setCompleted(LOG);
+ LOG.incrementProcessed(progressLOFs);
}
-
- // Build result representation.
- Relation<Double> scoreResult = new MaterializedRelation<>("Simple Local Outlier Factor", "simple-lof-outlier", TypeUtil.DOUBLE, lofs, ids);
- OutlierScoreMeta scoreMeta = new QuotientOutlierScoreMeta(lofminmax.getMin(), lofminmax.getMax(), 0.0, Double.POSITIVE_INFINITY, 1.0);
- OutlierResult result = new OutlierResult(scoreMeta, scoreResult);
-
- return result;
+ LOG.ensureCompleted(progressLOFs);
}
@Override
@@ -238,10 +223,9 @@ public class SimplifiedLOF<O, D extends NumberDistance<D, ?>> extends AbstractDi
*
* @apiviz.exclude
*
- * @param <O> vector type
- * @param <D> distance type
+ * @param <O> Object type
*/
- public static class Parameterizer<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedAlgorithm.Parameterizer<O, D> {
+ public static class Parameterizer<O> extends AbstractDistanceBasedAlgorithm.Parameterizer<O> {
/**
* The neighborhood size to use.
*/
@@ -252,14 +236,14 @@ public class SimplifiedLOF<O, D extends NumberDistance<D, ?>> extends AbstractDi
super.makeOptions(config);
final IntParameter pK = new IntParameter(LOF.Parameterizer.K_ID);
- pK.addConstraint(CommonConstraints.GREATER_THAN_ONE_INT);
+ pK.addConstraint(CommonConstraints.GREATER_EQUAL_ONE_INT);
if(config.grab(pK)) {
k = pK.getValue();
}
}
@Override
- protected SimplifiedLOF<O, D> makeInstance() {
+ protected SimplifiedLOF<O> makeInstance() {
return new SimplifiedLOF<>(k, distanceFunction);
}
}